sqlglot.parser
1from __future__ import annotations 2 3import itertools 4import logging 5import re 6import typing as t 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ErrorLevel, ParseError, TokenError, concat_messages, merge_errors 11from sqlglot.helper import apply_index_offset, ensure_list, seq_get 12from sqlglot.time import format_time 13from sqlglot.tokens import Token, Tokenizer, TokenType 14from sqlglot.trie import TrieResult, in_trie, new_trie 15 16if t.TYPE_CHECKING: 17 from sqlglot._typing import E, Lit 18 from sqlglot.dialects.dialect import Dialect, DialectType 19 20 T = t.TypeVar("T") 21 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 22 23logger = logging.getLogger("sqlglot") 24 25OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 26 27# Used to detect alphabetical characters and +/- in timestamp literals 28TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 29 30 31def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 32 if len(args) == 1 and args[0].is_star: 33 return exp.StarMap(this=args[0]) 34 35 keys = [] 36 values = [] 37 for i in range(0, len(args), 2): 38 keys.append(args[i]) 39 values.append(args[i + 1]) 40 41 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 42 43 44def build_like(args: t.List) -> exp.Escape | exp.Like: 45 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 46 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 47 48 49def binary_range_parser( 50 expr_type: t.Type[exp.Expression], reverse_args: bool = False 51) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 52 def _parse_binary_range( 53 self: Parser, this: t.Optional[exp.Expression] 54 ) -> t.Optional[exp.Expression]: 55 expression = self._parse_bitwise() 56 if reverse_args: 57 this, expression = expression, this 58 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 59 60 return _parse_binary_range 61 62 63def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 64 # Default argument order is base, expression 65 this = seq_get(args, 0) 66 expression = seq_get(args, 1) 67 68 if expression: 69 if not dialect.LOG_BASE_FIRST: 70 this, expression = expression, this 71 return exp.Log(this=this, expression=expression) 72 73 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 74 75 76def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 79 80 81def build_lower(args: t.List) -> exp.Lower | exp.Hex: 82 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 85 86 87def build_upper(args: t.List) -> exp.Upper | exp.Hex: 88 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 89 arg = seq_get(args, 0) 90 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 91 92 93def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 94 def _builder(args: t.List, dialect: Dialect) -> E: 95 expression = expr_type( 96 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 97 ) 98 if len(args) > 2 and expr_type is exp.JSONExtract: 99 expression.set("expressions", args[2:]) 100 101 return expression 102 103 return _builder 104 105 106def build_mod(args: t.List) -> exp.Mod: 107 this = seq_get(args, 0) 108 expression = seq_get(args, 1) 109 110 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 111 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 112 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 113 114 return exp.Mod(this=this, expression=expression) 115 116 117def build_pad(args: t.List, is_left: bool = True): 118 return exp.Pad( 119 this=seq_get(args, 0), 120 expression=seq_get(args, 1), 121 fill_pattern=seq_get(args, 2), 122 is_left=is_left, 123 ) 124 125 126def build_array_constructor( 127 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 128) -> exp.Expression: 129 array_exp = exp_class(expressions=args) 130 131 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 132 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 133 134 return array_exp 135 136 137def build_convert_timezone( 138 args: t.List, default_source_tz: t.Optional[str] = None 139) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 140 if len(args) == 2: 141 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 142 return exp.ConvertTimezone( 143 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 144 ) 145 146 return exp.ConvertTimezone.from_arg_list(args) 147 148 149def build_trim(args: t.List, is_left: bool = True): 150 return exp.Trim( 151 this=seq_get(args, 0), 152 expression=seq_get(args, 1), 153 position="LEADING" if is_left else "TRAILING", 154 ) 155 156 157def build_coalesce( 158 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 159) -> exp.Coalesce: 160 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 161 162 163def build_locate_strposition(args: t.List): 164 return exp.StrPosition( 165 this=seq_get(args, 1), 166 substr=seq_get(args, 0), 167 position=seq_get(args, 2), 168 ) 169 170 171class _Parser(type): 172 def __new__(cls, clsname, bases, attrs): 173 klass = super().__new__(cls, clsname, bases, attrs) 174 175 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 176 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 177 178 return klass 179 180 181class Parser(metaclass=_Parser): 182 """ 183 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 184 185 Args: 186 error_level: The desired error level. 187 Default: ErrorLevel.IMMEDIATE 188 error_message_context: The amount of context to capture from a query string when displaying 189 the error message (in number of characters). 190 Default: 100 191 max_errors: Maximum number of error messages to include in a raised ParseError. 192 This is only relevant if error_level is ErrorLevel.RAISE. 193 Default: 3 194 """ 195 196 FUNCTIONS: t.Dict[str, t.Callable] = { 197 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 198 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 199 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 200 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 204 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 205 ), 206 "CHAR": lambda args: exp.Chr(expressions=args), 207 "CHR": lambda args: exp.Chr(expressions=args), 208 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 209 "CONCAT": lambda args, dialect: exp.Concat( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 215 expressions=args, 216 safe=not dialect.STRICT_STRING_CONCAT, 217 coalesce=dialect.CONCAT_COALESCE, 218 ), 219 "CONVERT_TIMEZONE": build_convert_timezone, 220 "DATE_TO_DATE_STR": lambda args: exp.Cast( 221 this=seq_get(args, 0), 222 to=exp.DataType(this=exp.DataType.Type.TEXT), 223 ), 224 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 225 start=seq_get(args, 0), 226 end=seq_get(args, 1), 227 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 228 ), 229 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 230 "HEX": build_hex, 231 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 232 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 233 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 234 "LIKE": build_like, 235 "LOG": build_logarithm, 236 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 237 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 238 "LOWER": build_lower, 239 "LPAD": lambda args: build_pad(args), 240 "LEFTPAD": lambda args: build_pad(args), 241 "LTRIM": lambda args: build_trim(args), 242 "MOD": build_mod, 243 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 244 "RPAD": lambda args: build_pad(args, is_left=False), 245 "RTRIM": lambda args: build_trim(args, is_left=False), 246 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 247 if len(args) != 2 248 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 249 "STRPOS": exp.StrPosition.from_arg_list, 250 "CHARINDEX": lambda args: build_locate_strposition(args), 251 "INSTR": exp.StrPosition.from_arg_list, 252 "LOCATE": lambda args: build_locate_strposition(args), 253 "TIME_TO_TIME_STR": lambda args: exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 "TO_HEX": build_hex, 258 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 259 this=exp.Cast( 260 this=seq_get(args, 0), 261 to=exp.DataType(this=exp.DataType.Type.TEXT), 262 ), 263 start=exp.Literal.number(1), 264 length=exp.Literal.number(10), 265 ), 266 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 267 "UPPER": build_upper, 268 "VAR_MAP": build_var_map, 269 } 270 271 NO_PAREN_FUNCTIONS = { 272 TokenType.CURRENT_DATE: exp.CurrentDate, 273 TokenType.CURRENT_DATETIME: exp.CurrentDate, 274 TokenType.CURRENT_TIME: exp.CurrentTime, 275 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 276 TokenType.CURRENT_USER: exp.CurrentUser, 277 } 278 279 STRUCT_TYPE_TOKENS = { 280 TokenType.NESTED, 281 TokenType.OBJECT, 282 TokenType.STRUCT, 283 TokenType.UNION, 284 } 285 286 NESTED_TYPE_TOKENS = { 287 TokenType.ARRAY, 288 TokenType.LIST, 289 TokenType.LOWCARDINALITY, 290 TokenType.MAP, 291 TokenType.NULLABLE, 292 TokenType.RANGE, 293 *STRUCT_TYPE_TOKENS, 294 } 295 296 ENUM_TYPE_TOKENS = { 297 TokenType.DYNAMIC, 298 TokenType.ENUM, 299 TokenType.ENUM8, 300 TokenType.ENUM16, 301 } 302 303 AGGREGATE_TYPE_TOKENS = { 304 TokenType.AGGREGATEFUNCTION, 305 TokenType.SIMPLEAGGREGATEFUNCTION, 306 } 307 308 TYPE_TOKENS = { 309 TokenType.BIT, 310 TokenType.BOOLEAN, 311 TokenType.TINYINT, 312 TokenType.UTINYINT, 313 TokenType.SMALLINT, 314 TokenType.USMALLINT, 315 TokenType.INT, 316 TokenType.UINT, 317 TokenType.BIGINT, 318 TokenType.UBIGINT, 319 TokenType.INT128, 320 TokenType.UINT128, 321 TokenType.INT256, 322 TokenType.UINT256, 323 TokenType.MEDIUMINT, 324 TokenType.UMEDIUMINT, 325 TokenType.FIXEDSTRING, 326 TokenType.FLOAT, 327 TokenType.DOUBLE, 328 TokenType.UDOUBLE, 329 TokenType.CHAR, 330 TokenType.NCHAR, 331 TokenType.VARCHAR, 332 TokenType.NVARCHAR, 333 TokenType.BPCHAR, 334 TokenType.TEXT, 335 TokenType.MEDIUMTEXT, 336 TokenType.LONGTEXT, 337 TokenType.BLOB, 338 TokenType.MEDIUMBLOB, 339 TokenType.LONGBLOB, 340 TokenType.BINARY, 341 TokenType.VARBINARY, 342 TokenType.JSON, 343 TokenType.JSONB, 344 TokenType.INTERVAL, 345 TokenType.TINYBLOB, 346 TokenType.TINYTEXT, 347 TokenType.TIME, 348 TokenType.TIMETZ, 349 TokenType.TIMESTAMP, 350 TokenType.TIMESTAMP_S, 351 TokenType.TIMESTAMP_MS, 352 TokenType.TIMESTAMP_NS, 353 TokenType.TIMESTAMPTZ, 354 TokenType.TIMESTAMPLTZ, 355 TokenType.TIMESTAMPNTZ, 356 TokenType.DATETIME, 357 TokenType.DATETIME2, 358 TokenType.DATETIME64, 359 TokenType.SMALLDATETIME, 360 TokenType.DATE, 361 TokenType.DATE32, 362 TokenType.INT4RANGE, 363 TokenType.INT4MULTIRANGE, 364 TokenType.INT8RANGE, 365 TokenType.INT8MULTIRANGE, 366 TokenType.NUMRANGE, 367 TokenType.NUMMULTIRANGE, 368 TokenType.TSRANGE, 369 TokenType.TSMULTIRANGE, 370 TokenType.TSTZRANGE, 371 TokenType.TSTZMULTIRANGE, 372 TokenType.DATERANGE, 373 TokenType.DATEMULTIRANGE, 374 TokenType.DECIMAL, 375 TokenType.DECIMAL32, 376 TokenType.DECIMAL64, 377 TokenType.DECIMAL128, 378 TokenType.DECIMAL256, 379 TokenType.UDECIMAL, 380 TokenType.BIGDECIMAL, 381 TokenType.UUID, 382 TokenType.GEOGRAPHY, 383 TokenType.GEOGRAPHYPOINT, 384 TokenType.GEOMETRY, 385 TokenType.POINT, 386 TokenType.RING, 387 TokenType.LINESTRING, 388 TokenType.MULTILINESTRING, 389 TokenType.POLYGON, 390 TokenType.MULTIPOLYGON, 391 TokenType.HLLSKETCH, 392 TokenType.HSTORE, 393 TokenType.PSEUDO_TYPE, 394 TokenType.SUPER, 395 TokenType.SERIAL, 396 TokenType.SMALLSERIAL, 397 TokenType.BIGSERIAL, 398 TokenType.XML, 399 TokenType.YEAR, 400 TokenType.USERDEFINED, 401 TokenType.MONEY, 402 TokenType.SMALLMONEY, 403 TokenType.ROWVERSION, 404 TokenType.IMAGE, 405 TokenType.VARIANT, 406 TokenType.VECTOR, 407 TokenType.VOID, 408 TokenType.OBJECT, 409 TokenType.OBJECT_IDENTIFIER, 410 TokenType.INET, 411 TokenType.IPADDRESS, 412 TokenType.IPPREFIX, 413 TokenType.IPV4, 414 TokenType.IPV6, 415 TokenType.UNKNOWN, 416 TokenType.NOTHING, 417 TokenType.NULL, 418 TokenType.NAME, 419 TokenType.TDIGEST, 420 TokenType.DYNAMIC, 421 *ENUM_TYPE_TOKENS, 422 *NESTED_TYPE_TOKENS, 423 *AGGREGATE_TYPE_TOKENS, 424 } 425 426 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 427 TokenType.BIGINT: TokenType.UBIGINT, 428 TokenType.INT: TokenType.UINT, 429 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 430 TokenType.SMALLINT: TokenType.USMALLINT, 431 TokenType.TINYINT: TokenType.UTINYINT, 432 TokenType.DECIMAL: TokenType.UDECIMAL, 433 TokenType.DOUBLE: TokenType.UDOUBLE, 434 } 435 436 SUBQUERY_PREDICATES = { 437 TokenType.ANY: exp.Any, 438 TokenType.ALL: exp.All, 439 TokenType.EXISTS: exp.Exists, 440 TokenType.SOME: exp.Any, 441 } 442 443 RESERVED_TOKENS = { 444 *Tokenizer.SINGLE_TOKENS.values(), 445 TokenType.SELECT, 446 } - {TokenType.IDENTIFIER} 447 448 DB_CREATABLES = { 449 TokenType.DATABASE, 450 TokenType.DICTIONARY, 451 TokenType.FILE_FORMAT, 452 TokenType.MODEL, 453 TokenType.NAMESPACE, 454 TokenType.SCHEMA, 455 TokenType.SEMANTIC_VIEW, 456 TokenType.SEQUENCE, 457 TokenType.SINK, 458 TokenType.SOURCE, 459 TokenType.STAGE, 460 TokenType.STORAGE_INTEGRATION, 461 TokenType.STREAMLIT, 462 TokenType.TABLE, 463 TokenType.TAG, 464 TokenType.VIEW, 465 TokenType.WAREHOUSE, 466 } 467 468 CREATABLES = { 469 TokenType.COLUMN, 470 TokenType.CONSTRAINT, 471 TokenType.FOREIGN_KEY, 472 TokenType.FUNCTION, 473 TokenType.INDEX, 474 TokenType.PROCEDURE, 475 *DB_CREATABLES, 476 } 477 478 ALTERABLES = { 479 TokenType.INDEX, 480 TokenType.TABLE, 481 TokenType.VIEW, 482 TokenType.SESSION, 483 } 484 485 # Tokens that can represent identifiers 486 ID_VAR_TOKENS = { 487 TokenType.ALL, 488 TokenType.ATTACH, 489 TokenType.VAR, 490 TokenType.ANTI, 491 TokenType.APPLY, 492 TokenType.ASC, 493 TokenType.ASOF, 494 TokenType.AUTO_INCREMENT, 495 TokenType.BEGIN, 496 TokenType.BPCHAR, 497 TokenType.CACHE, 498 TokenType.CASE, 499 TokenType.COLLATE, 500 TokenType.COMMAND, 501 TokenType.COMMENT, 502 TokenType.COMMIT, 503 TokenType.CONSTRAINT, 504 TokenType.COPY, 505 TokenType.CUBE, 506 TokenType.CURRENT_SCHEMA, 507 TokenType.DEFAULT, 508 TokenType.DELETE, 509 TokenType.DESC, 510 TokenType.DESCRIBE, 511 TokenType.DETACH, 512 TokenType.DICTIONARY, 513 TokenType.DIV, 514 TokenType.END, 515 TokenType.EXECUTE, 516 TokenType.EXPORT, 517 TokenType.ESCAPE, 518 TokenType.FALSE, 519 TokenType.FIRST, 520 TokenType.FILTER, 521 TokenType.FINAL, 522 TokenType.FORMAT, 523 TokenType.FULL, 524 TokenType.GET, 525 TokenType.IDENTIFIER, 526 TokenType.IS, 527 TokenType.ISNULL, 528 TokenType.INTERVAL, 529 TokenType.KEEP, 530 TokenType.KILL, 531 TokenType.LEFT, 532 TokenType.LIMIT, 533 TokenType.LOAD, 534 TokenType.LOCK, 535 TokenType.MERGE, 536 TokenType.NATURAL, 537 TokenType.NEXT, 538 TokenType.OFFSET, 539 TokenType.OPERATOR, 540 TokenType.ORDINALITY, 541 TokenType.OVERLAPS, 542 TokenType.OVERWRITE, 543 TokenType.PARTITION, 544 TokenType.PERCENT, 545 TokenType.PIVOT, 546 TokenType.PRAGMA, 547 TokenType.PUT, 548 TokenType.RANGE, 549 TokenType.RECURSIVE, 550 TokenType.REFERENCES, 551 TokenType.REFRESH, 552 TokenType.RENAME, 553 TokenType.REPLACE, 554 TokenType.RIGHT, 555 TokenType.ROLLUP, 556 TokenType.ROW, 557 TokenType.ROWS, 558 TokenType.SEMI, 559 TokenType.SET, 560 TokenType.SETTINGS, 561 TokenType.SHOW, 562 TokenType.TEMPORARY, 563 TokenType.TOP, 564 TokenType.TRUE, 565 TokenType.TRUNCATE, 566 TokenType.UNIQUE, 567 TokenType.UNNEST, 568 TokenType.UNPIVOT, 569 TokenType.UPDATE, 570 TokenType.USE, 571 TokenType.VOLATILE, 572 TokenType.WINDOW, 573 *ALTERABLES, 574 *CREATABLES, 575 *SUBQUERY_PREDICATES, 576 *TYPE_TOKENS, 577 *NO_PAREN_FUNCTIONS, 578 } 579 ID_VAR_TOKENS.remove(TokenType.UNION) 580 581 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 582 TokenType.ANTI, 583 TokenType.ASOF, 584 TokenType.FULL, 585 TokenType.LEFT, 586 TokenType.LOCK, 587 TokenType.NATURAL, 588 TokenType.RIGHT, 589 TokenType.SEMI, 590 TokenType.WINDOW, 591 } 592 593 ALIAS_TOKENS = ID_VAR_TOKENS 594 595 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 596 597 ARRAY_CONSTRUCTORS = { 598 "ARRAY": exp.Array, 599 "LIST": exp.List, 600 } 601 602 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 603 604 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 605 606 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 607 608 FUNC_TOKENS = { 609 TokenType.COLLATE, 610 TokenType.COMMAND, 611 TokenType.CURRENT_DATE, 612 TokenType.CURRENT_DATETIME, 613 TokenType.CURRENT_SCHEMA, 614 TokenType.CURRENT_TIMESTAMP, 615 TokenType.CURRENT_TIME, 616 TokenType.CURRENT_USER, 617 TokenType.FILTER, 618 TokenType.FIRST, 619 TokenType.FORMAT, 620 TokenType.GET, 621 TokenType.GLOB, 622 TokenType.IDENTIFIER, 623 TokenType.INDEX, 624 TokenType.ISNULL, 625 TokenType.ILIKE, 626 TokenType.INSERT, 627 TokenType.LIKE, 628 TokenType.MERGE, 629 TokenType.NEXT, 630 TokenType.OFFSET, 631 TokenType.PRIMARY_KEY, 632 TokenType.RANGE, 633 TokenType.REPLACE, 634 TokenType.RLIKE, 635 TokenType.ROW, 636 TokenType.UNNEST, 637 TokenType.VAR, 638 TokenType.LEFT, 639 TokenType.RIGHT, 640 TokenType.SEQUENCE, 641 TokenType.DATE, 642 TokenType.DATETIME, 643 TokenType.TABLE, 644 TokenType.TIMESTAMP, 645 TokenType.TIMESTAMPTZ, 646 TokenType.TRUNCATE, 647 TokenType.UTC_DATE, 648 TokenType.UTC_TIME, 649 TokenType.UTC_TIMESTAMP, 650 TokenType.WINDOW, 651 TokenType.XOR, 652 *TYPE_TOKENS, 653 *SUBQUERY_PREDICATES, 654 } 655 656 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 657 TokenType.AND: exp.And, 658 } 659 660 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 661 TokenType.COLON_EQ: exp.PropertyEQ, 662 } 663 664 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 665 TokenType.OR: exp.Or, 666 } 667 668 EQUALITY = { 669 TokenType.EQ: exp.EQ, 670 TokenType.NEQ: exp.NEQ, 671 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 672 } 673 674 COMPARISON = { 675 TokenType.GT: exp.GT, 676 TokenType.GTE: exp.GTE, 677 TokenType.LT: exp.LT, 678 TokenType.LTE: exp.LTE, 679 } 680 681 BITWISE = { 682 TokenType.AMP: exp.BitwiseAnd, 683 TokenType.CARET: exp.BitwiseXor, 684 TokenType.PIPE: exp.BitwiseOr, 685 } 686 687 TERM = { 688 TokenType.DASH: exp.Sub, 689 TokenType.PLUS: exp.Add, 690 TokenType.MOD: exp.Mod, 691 TokenType.COLLATE: exp.Collate, 692 } 693 694 FACTOR = { 695 TokenType.DIV: exp.IntDiv, 696 TokenType.LR_ARROW: exp.Distance, 697 TokenType.SLASH: exp.Div, 698 TokenType.STAR: exp.Mul, 699 } 700 701 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 702 703 TIMES = { 704 TokenType.TIME, 705 TokenType.TIMETZ, 706 } 707 708 TIMESTAMPS = { 709 TokenType.TIMESTAMP, 710 TokenType.TIMESTAMPNTZ, 711 TokenType.TIMESTAMPTZ, 712 TokenType.TIMESTAMPLTZ, 713 *TIMES, 714 } 715 716 SET_OPERATIONS = { 717 TokenType.UNION, 718 TokenType.INTERSECT, 719 TokenType.EXCEPT, 720 } 721 722 JOIN_METHODS = { 723 TokenType.ASOF, 724 TokenType.NATURAL, 725 TokenType.POSITIONAL, 726 } 727 728 JOIN_SIDES = { 729 TokenType.LEFT, 730 TokenType.RIGHT, 731 TokenType.FULL, 732 } 733 734 JOIN_KINDS = { 735 TokenType.ANTI, 736 TokenType.CROSS, 737 TokenType.INNER, 738 TokenType.OUTER, 739 TokenType.SEMI, 740 TokenType.STRAIGHT_JOIN, 741 } 742 743 JOIN_HINTS: t.Set[str] = set() 744 745 LAMBDAS = { 746 TokenType.ARROW: lambda self, expressions: self.expression( 747 exp.Lambda, 748 this=self._replace_lambda( 749 self._parse_assignment(), 750 expressions, 751 ), 752 expressions=expressions, 753 ), 754 TokenType.FARROW: lambda self, expressions: self.expression( 755 exp.Kwarg, 756 this=exp.var(expressions[0].name), 757 expression=self._parse_assignment(), 758 ), 759 } 760 761 COLUMN_OPERATORS = { 762 TokenType.DOT: None, 763 TokenType.DOTCOLON: lambda self, this, to: self.expression( 764 exp.JSONCast, 765 this=this, 766 to=to, 767 ), 768 TokenType.DCOLON: lambda self, this, to: self.build_cast( 769 strict=self.STRICT_CAST, this=this, to=to 770 ), 771 TokenType.ARROW: lambda self, this, path: self.expression( 772 exp.JSONExtract, 773 this=this, 774 expression=self.dialect.to_json_path(path), 775 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 776 ), 777 TokenType.DARROW: lambda self, this, path: self.expression( 778 exp.JSONExtractScalar, 779 this=this, 780 expression=self.dialect.to_json_path(path), 781 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 782 ), 783 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 784 exp.JSONBExtract, 785 this=this, 786 expression=path, 787 ), 788 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 789 exp.JSONBExtractScalar, 790 this=this, 791 expression=path, 792 ), 793 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 794 exp.JSONBContains, 795 this=this, 796 expression=key, 797 ), 798 } 799 800 CAST_COLUMN_OPERATORS = { 801 TokenType.DOTCOLON, 802 TokenType.DCOLON, 803 } 804 805 EXPRESSION_PARSERS = { 806 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 807 exp.Column: lambda self: self._parse_column(), 808 exp.Condition: lambda self: self._parse_assignment(), 809 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 810 exp.Expression: lambda self: self._parse_expression(), 811 exp.From: lambda self: self._parse_from(joins=True), 812 exp.GrantPrincipal: lambda self: self._parse_grant_principal(), 813 exp.GrantPrivilege: lambda self: self._parse_grant_privilege(), 814 exp.Group: lambda self: self._parse_group(), 815 exp.Having: lambda self: self._parse_having(), 816 exp.Hint: lambda self: self._parse_hint_body(), 817 exp.Identifier: lambda self: self._parse_id_var(), 818 exp.Join: lambda self: self._parse_join(), 819 exp.Lambda: lambda self: self._parse_lambda(), 820 exp.Lateral: lambda self: self._parse_lateral(), 821 exp.Limit: lambda self: self._parse_limit(), 822 exp.Offset: lambda self: self._parse_offset(), 823 exp.Order: lambda self: self._parse_order(), 824 exp.Ordered: lambda self: self._parse_ordered(), 825 exp.Properties: lambda self: self._parse_properties(), 826 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 827 exp.Qualify: lambda self: self._parse_qualify(), 828 exp.Returning: lambda self: self._parse_returning(), 829 exp.Select: lambda self: self._parse_select(), 830 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 831 exp.Table: lambda self: self._parse_table_parts(), 832 exp.TableAlias: lambda self: self._parse_table_alias(), 833 exp.Tuple: lambda self: self._parse_value(values=False), 834 exp.Whens: lambda self: self._parse_when_matched(), 835 exp.Where: lambda self: self._parse_where(), 836 exp.Window: lambda self: self._parse_named_window(), 837 exp.With: lambda self: self._parse_with(), 838 "JOIN_TYPE": lambda self: self._parse_join_parts(), 839 } 840 841 STATEMENT_PARSERS = { 842 TokenType.ALTER: lambda self: self._parse_alter(), 843 TokenType.ANALYZE: lambda self: self._parse_analyze(), 844 TokenType.BEGIN: lambda self: self._parse_transaction(), 845 TokenType.CACHE: lambda self: self._parse_cache(), 846 TokenType.COMMENT: lambda self: self._parse_comment(), 847 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 848 TokenType.COPY: lambda self: self._parse_copy(), 849 TokenType.CREATE: lambda self: self._parse_create(), 850 TokenType.DELETE: lambda self: self._parse_delete(), 851 TokenType.DESC: lambda self: self._parse_describe(), 852 TokenType.DESCRIBE: lambda self: self._parse_describe(), 853 TokenType.DROP: lambda self: self._parse_drop(), 854 TokenType.GRANT: lambda self: self._parse_grant(), 855 TokenType.REVOKE: lambda self: self._parse_revoke(), 856 TokenType.INSERT: lambda self: self._parse_insert(), 857 TokenType.KILL: lambda self: self._parse_kill(), 858 TokenType.LOAD: lambda self: self._parse_load(), 859 TokenType.MERGE: lambda self: self._parse_merge(), 860 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 861 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 862 TokenType.REFRESH: lambda self: self._parse_refresh(), 863 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 864 TokenType.SET: lambda self: self._parse_set(), 865 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 866 TokenType.UNCACHE: lambda self: self._parse_uncache(), 867 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 868 TokenType.UPDATE: lambda self: self._parse_update(), 869 TokenType.USE: lambda self: self._parse_use(), 870 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 871 } 872 873 UNARY_PARSERS = { 874 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 875 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 876 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 877 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 878 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 879 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 880 } 881 882 STRING_PARSERS = { 883 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 884 exp.RawString, this=token.text 885 ), 886 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 887 exp.National, this=token.text 888 ), 889 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 890 TokenType.STRING: lambda self, token: self.expression( 891 exp.Literal, this=token.text, is_string=True 892 ), 893 TokenType.UNICODE_STRING: lambda self, token: self.expression( 894 exp.UnicodeString, 895 this=token.text, 896 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 897 ), 898 } 899 900 NUMERIC_PARSERS = { 901 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 902 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 903 TokenType.HEX_STRING: lambda self, token: self.expression( 904 exp.HexString, 905 this=token.text, 906 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 907 ), 908 TokenType.NUMBER: lambda self, token: self.expression( 909 exp.Literal, this=token.text, is_string=False 910 ), 911 } 912 913 PRIMARY_PARSERS = { 914 **STRING_PARSERS, 915 **NUMERIC_PARSERS, 916 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 917 TokenType.NULL: lambda self, _: self.expression(exp.Null), 918 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 919 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 920 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 921 TokenType.STAR: lambda self, _: self._parse_star_ops(), 922 } 923 924 PLACEHOLDER_PARSERS = { 925 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 926 TokenType.PARAMETER: lambda self: self._parse_parameter(), 927 TokenType.COLON: lambda self: ( 928 self.expression(exp.Placeholder, this=self._prev.text) 929 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 930 else None 931 ), 932 } 933 934 RANGE_PARSERS = { 935 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 936 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 937 TokenType.GLOB: binary_range_parser(exp.Glob), 938 TokenType.ILIKE: binary_range_parser(exp.ILike), 939 TokenType.IN: lambda self, this: self._parse_in(this), 940 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 941 TokenType.IS: lambda self, this: self._parse_is(this), 942 TokenType.LIKE: binary_range_parser(exp.Like), 943 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 944 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 945 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 946 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 947 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 948 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 949 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 950 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 951 } 952 953 PIPE_SYNTAX_TRANSFORM_PARSERS = { 954 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 955 "AS": lambda self, query: self._build_pipe_cte( 956 query, [exp.Star()], self._parse_table_alias() 957 ), 958 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 959 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 960 "ORDER BY": lambda self, query: query.order_by( 961 self._parse_order(), append=False, copy=False 962 ), 963 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 964 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 965 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 966 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 967 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 968 } 969 970 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 971 "ALLOWED_VALUES": lambda self: self.expression( 972 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 973 ), 974 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 975 "AUTO": lambda self: self._parse_auto_property(), 976 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 977 "BACKUP": lambda self: self.expression( 978 exp.BackupProperty, this=self._parse_var(any_token=True) 979 ), 980 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 981 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 982 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 983 "CHECKSUM": lambda self: self._parse_checksum(), 984 "CLUSTER BY": lambda self: self._parse_cluster(), 985 "CLUSTERED": lambda self: self._parse_clustered_by(), 986 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 987 exp.CollateProperty, **kwargs 988 ), 989 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 990 "CONTAINS": lambda self: self._parse_contains_property(), 991 "COPY": lambda self: self._parse_copy_property(), 992 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 993 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 994 "DEFINER": lambda self: self._parse_definer(), 995 "DETERMINISTIC": lambda self: self.expression( 996 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 997 ), 998 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 999 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 1000 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 1001 "DISTKEY": lambda self: self._parse_distkey(), 1002 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1003 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 1004 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1005 "ENVIRONMENT": lambda self: self.expression( 1006 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1007 ), 1008 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1009 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1010 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1011 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1012 "FREESPACE": lambda self: self._parse_freespace(), 1013 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1014 "HEAP": lambda self: self.expression(exp.HeapProperty), 1015 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1016 "IMMUTABLE": lambda self: self.expression( 1017 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1018 ), 1019 "INHERITS": lambda self: self.expression( 1020 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1021 ), 1022 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1023 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1024 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1025 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1026 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1027 "LIKE": lambda self: self._parse_create_like(), 1028 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1029 "LOCK": lambda self: self._parse_locking(), 1030 "LOCKING": lambda self: self._parse_locking(), 1031 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1032 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1033 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1034 "MODIFIES": lambda self: self._parse_modifies_property(), 1035 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1036 "NO": lambda self: self._parse_no_property(), 1037 "ON": lambda self: self._parse_on_property(), 1038 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1039 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1040 "PARTITION": lambda self: self._parse_partitioned_of(), 1041 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1042 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1043 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1044 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1045 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1046 "READS": lambda self: self._parse_reads_property(), 1047 "REMOTE": lambda self: self._parse_remote_with_connection(), 1048 "RETURNS": lambda self: self._parse_returns(), 1049 "STRICT": lambda self: self.expression(exp.StrictProperty), 1050 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1051 "ROW": lambda self: self._parse_row(), 1052 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1053 "SAMPLE": lambda self: self.expression( 1054 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1055 ), 1056 "SECURE": lambda self: self.expression(exp.SecureProperty), 1057 "SECURITY": lambda self: self._parse_security(), 1058 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1059 "SETTINGS": lambda self: self._parse_settings_property(), 1060 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1061 "SORTKEY": lambda self: self._parse_sortkey(), 1062 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1063 "STABLE": lambda self: self.expression( 1064 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1065 ), 1066 "STORED": lambda self: self._parse_stored(), 1067 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1068 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1069 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1070 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1071 "TO": lambda self: self._parse_to_table(), 1072 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1073 "TRANSFORM": lambda self: self.expression( 1074 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1075 ), 1076 "TTL": lambda self: self._parse_ttl(), 1077 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1078 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1079 "VOLATILE": lambda self: self._parse_volatile_property(), 1080 "WITH": lambda self: self._parse_with_property(), 1081 } 1082 1083 CONSTRAINT_PARSERS = { 1084 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1085 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1086 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1087 "CHARACTER SET": lambda self: self.expression( 1088 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1089 ), 1090 "CHECK": lambda self: self.expression( 1091 exp.CheckColumnConstraint, 1092 this=self._parse_wrapped(self._parse_assignment), 1093 enforced=self._match_text_seq("ENFORCED"), 1094 ), 1095 "COLLATE": lambda self: self.expression( 1096 exp.CollateColumnConstraint, 1097 this=self._parse_identifier() or self._parse_column(), 1098 ), 1099 "COMMENT": lambda self: self.expression( 1100 exp.CommentColumnConstraint, this=self._parse_string() 1101 ), 1102 "COMPRESS": lambda self: self._parse_compress(), 1103 "CLUSTERED": lambda self: self.expression( 1104 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1105 ), 1106 "NONCLUSTERED": lambda self: self.expression( 1107 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1108 ), 1109 "DEFAULT": lambda self: self.expression( 1110 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1111 ), 1112 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1113 "EPHEMERAL": lambda self: self.expression( 1114 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1115 ), 1116 "EXCLUDE": lambda self: self.expression( 1117 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1118 ), 1119 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1120 "FORMAT": lambda self: self.expression( 1121 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1122 ), 1123 "GENERATED": lambda self: self._parse_generated_as_identity(), 1124 "IDENTITY": lambda self: self._parse_auto_increment(), 1125 "INLINE": lambda self: self._parse_inline(), 1126 "LIKE": lambda self: self._parse_create_like(), 1127 "NOT": lambda self: self._parse_not_constraint(), 1128 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1129 "ON": lambda self: ( 1130 self._match(TokenType.UPDATE) 1131 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1132 ) 1133 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1134 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1135 "PERIOD": lambda self: self._parse_period_for_system_time(), 1136 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1137 "REFERENCES": lambda self: self._parse_references(match=False), 1138 "TITLE": lambda self: self.expression( 1139 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1140 ), 1141 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1142 "UNIQUE": lambda self: self._parse_unique(), 1143 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1144 "WITH": lambda self: self.expression( 1145 exp.Properties, expressions=self._parse_wrapped_properties() 1146 ), 1147 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1148 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1149 } 1150 1151 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1152 if not self._match(TokenType.L_PAREN, advance=False): 1153 # Partitioning by bucket or truncate follows the syntax: 1154 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1155 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1156 self._retreat(self._index - 1) 1157 return None 1158 1159 klass = ( 1160 exp.PartitionedByBucket 1161 if self._prev.text.upper() == "BUCKET" 1162 else exp.PartitionByTruncate 1163 ) 1164 1165 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1166 this, expression = seq_get(args, 0), seq_get(args, 1) 1167 1168 if isinstance(this, exp.Literal): 1169 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1170 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1171 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1172 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1173 # 1174 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1175 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1176 this, expression = expression, this 1177 1178 return self.expression(klass, this=this, expression=expression) 1179 1180 ALTER_PARSERS = { 1181 "ADD": lambda self: self._parse_alter_table_add(), 1182 "AS": lambda self: self._parse_select(), 1183 "ALTER": lambda self: self._parse_alter_table_alter(), 1184 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1185 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1186 "DROP": lambda self: self._parse_alter_table_drop(), 1187 "RENAME": lambda self: self._parse_alter_table_rename(), 1188 "SET": lambda self: self._parse_alter_table_set(), 1189 "SWAP": lambda self: self.expression( 1190 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1191 ), 1192 } 1193 1194 ALTER_ALTER_PARSERS = { 1195 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1196 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1197 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1198 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1199 } 1200 1201 SCHEMA_UNNAMED_CONSTRAINTS = { 1202 "CHECK", 1203 "EXCLUDE", 1204 "FOREIGN KEY", 1205 "LIKE", 1206 "PERIOD", 1207 "PRIMARY KEY", 1208 "UNIQUE", 1209 "BUCKET", 1210 "TRUNCATE", 1211 } 1212 1213 NO_PAREN_FUNCTION_PARSERS = { 1214 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1215 "CASE": lambda self: self._parse_case(), 1216 "CONNECT_BY_ROOT": lambda self: self.expression( 1217 exp.ConnectByRoot, this=self._parse_column() 1218 ), 1219 "IF": lambda self: self._parse_if(), 1220 } 1221 1222 INVALID_FUNC_NAME_TOKENS = { 1223 TokenType.IDENTIFIER, 1224 TokenType.STRING, 1225 } 1226 1227 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1228 1229 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1230 1231 FUNCTION_PARSERS = { 1232 **{ 1233 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1234 }, 1235 **{ 1236 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1237 }, 1238 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1239 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1240 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1241 "DECODE": lambda self: self._parse_decode(), 1242 "EXTRACT": lambda self: self._parse_extract(), 1243 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1244 "GAP_FILL": lambda self: self._parse_gap_fill(), 1245 "JSON_OBJECT": lambda self: self._parse_json_object(), 1246 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1247 "JSON_TABLE": lambda self: self._parse_json_table(), 1248 "MATCH": lambda self: self._parse_match_against(), 1249 "NORMALIZE": lambda self: self._parse_normalize(), 1250 "OPENJSON": lambda self: self._parse_open_json(), 1251 "OVERLAY": lambda self: self._parse_overlay(), 1252 "POSITION": lambda self: self._parse_position(), 1253 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1254 "STRING_AGG": lambda self: self._parse_string_agg(), 1255 "SUBSTRING": lambda self: self._parse_substring(), 1256 "TRIM": lambda self: self._parse_trim(), 1257 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1258 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1259 "XMLELEMENT": lambda self: self.expression( 1260 exp.XMLElement, 1261 this=self._match_text_seq("NAME") and self._parse_id_var(), 1262 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1263 ), 1264 "XMLTABLE": lambda self: self._parse_xml_table(), 1265 } 1266 1267 QUERY_MODIFIER_PARSERS = { 1268 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1269 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1270 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1271 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1272 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1273 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1274 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1275 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1276 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1277 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1278 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1279 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1280 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1281 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1282 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1283 TokenType.CLUSTER_BY: lambda self: ( 1284 "cluster", 1285 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1286 ), 1287 TokenType.DISTRIBUTE_BY: lambda self: ( 1288 "distribute", 1289 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1290 ), 1291 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1292 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1293 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1294 } 1295 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1296 1297 SET_PARSERS = { 1298 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1299 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1300 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1301 "TRANSACTION": lambda self: self._parse_set_transaction(), 1302 } 1303 1304 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1305 1306 TYPE_LITERAL_PARSERS = { 1307 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1308 } 1309 1310 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1311 1312 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1313 1314 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1315 1316 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1317 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1318 "ISOLATION": ( 1319 ("LEVEL", "REPEATABLE", "READ"), 1320 ("LEVEL", "READ", "COMMITTED"), 1321 ("LEVEL", "READ", "UNCOMITTED"), 1322 ("LEVEL", "SERIALIZABLE"), 1323 ), 1324 "READ": ("WRITE", "ONLY"), 1325 } 1326 1327 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1328 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1329 ) 1330 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1331 1332 CREATE_SEQUENCE: OPTIONS_TYPE = { 1333 "SCALE": ("EXTEND", "NOEXTEND"), 1334 "SHARD": ("EXTEND", "NOEXTEND"), 1335 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1336 **dict.fromkeys( 1337 ( 1338 "SESSION", 1339 "GLOBAL", 1340 "KEEP", 1341 "NOKEEP", 1342 "ORDER", 1343 "NOORDER", 1344 "NOCACHE", 1345 "CYCLE", 1346 "NOCYCLE", 1347 "NOMINVALUE", 1348 "NOMAXVALUE", 1349 "NOSCALE", 1350 "NOSHARD", 1351 ), 1352 tuple(), 1353 ), 1354 } 1355 1356 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1357 1358 USABLES: OPTIONS_TYPE = dict.fromkeys( 1359 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1360 ) 1361 1362 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1363 1364 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1365 "TYPE": ("EVOLUTION",), 1366 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1367 } 1368 1369 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1370 1371 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1372 1373 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1374 "NOT": ("ENFORCED",), 1375 "MATCH": ( 1376 "FULL", 1377 "PARTIAL", 1378 "SIMPLE", 1379 ), 1380 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1381 "USING": ( 1382 "BTREE", 1383 "HASH", 1384 ), 1385 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1386 } 1387 1388 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1389 "NO": ("OTHERS",), 1390 "CURRENT": ("ROW",), 1391 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1392 } 1393 1394 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1395 1396 CLONE_KEYWORDS = {"CLONE", "COPY"} 1397 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1398 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1399 1400 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1401 1402 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1403 1404 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1405 1406 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1407 1408 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.RANGE, TokenType.ROWS} 1409 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1410 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1411 1412 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1413 1414 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1415 1416 ADD_CONSTRAINT_TOKENS = { 1417 TokenType.CONSTRAINT, 1418 TokenType.FOREIGN_KEY, 1419 TokenType.INDEX, 1420 TokenType.KEY, 1421 TokenType.PRIMARY_KEY, 1422 TokenType.UNIQUE, 1423 } 1424 1425 DISTINCT_TOKENS = {TokenType.DISTINCT} 1426 1427 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1428 1429 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1430 1431 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1432 1433 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1434 1435 ODBC_DATETIME_LITERALS = { 1436 "d": exp.Date, 1437 "t": exp.Time, 1438 "ts": exp.Timestamp, 1439 } 1440 1441 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1442 1443 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1444 1445 # The style options for the DESCRIBE statement 1446 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1447 1448 # The style options for the ANALYZE statement 1449 ANALYZE_STYLES = { 1450 "BUFFER_USAGE_LIMIT", 1451 "FULL", 1452 "LOCAL", 1453 "NO_WRITE_TO_BINLOG", 1454 "SAMPLE", 1455 "SKIP_LOCKED", 1456 "VERBOSE", 1457 } 1458 1459 ANALYZE_EXPRESSION_PARSERS = { 1460 "ALL": lambda self: self._parse_analyze_columns(), 1461 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1462 "DELETE": lambda self: self._parse_analyze_delete(), 1463 "DROP": lambda self: self._parse_analyze_histogram(), 1464 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1465 "LIST": lambda self: self._parse_analyze_list(), 1466 "PREDICATE": lambda self: self._parse_analyze_columns(), 1467 "UPDATE": lambda self: self._parse_analyze_histogram(), 1468 "VALIDATE": lambda self: self._parse_analyze_validate(), 1469 } 1470 1471 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1472 1473 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1474 1475 OPERATION_MODIFIERS: t.Set[str] = set() 1476 1477 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1478 1479 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1480 1481 STRICT_CAST = True 1482 1483 PREFIXED_PIVOT_COLUMNS = False 1484 IDENTIFY_PIVOT_STRINGS = False 1485 1486 LOG_DEFAULTS_TO_LN = False 1487 1488 # Whether the table sample clause expects CSV syntax 1489 TABLESAMPLE_CSV = False 1490 1491 # The default method used for table sampling 1492 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1493 1494 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1495 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1496 1497 # Whether the TRIM function expects the characters to trim as its first argument 1498 TRIM_PATTERN_FIRST = False 1499 1500 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1501 STRING_ALIASES = False 1502 1503 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1504 MODIFIERS_ATTACHED_TO_SET_OP = True 1505 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1506 1507 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1508 NO_PAREN_IF_COMMANDS = True 1509 1510 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1511 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1512 1513 # Whether the `:` operator is used to extract a value from a VARIANT column 1514 COLON_IS_VARIANT_EXTRACT = False 1515 1516 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1517 # If this is True and '(' is not found, the keyword will be treated as an identifier 1518 VALUES_FOLLOWED_BY_PAREN = True 1519 1520 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1521 SUPPORTS_IMPLICIT_UNNEST = False 1522 1523 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1524 INTERVAL_SPANS = True 1525 1526 # Whether a PARTITION clause can follow a table reference 1527 SUPPORTS_PARTITION_SELECTION = False 1528 1529 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1530 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1531 1532 # Whether the 'AS' keyword is optional in the CTE definition syntax 1533 OPTIONAL_ALIAS_TOKEN_CTE = True 1534 1535 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1536 ALTER_RENAME_REQUIRES_COLUMN = True 1537 1538 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1539 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1540 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1541 # as BigQuery, where all joins have the same precedence. 1542 JOINS_HAVE_EQUAL_PRECEDENCE = False 1543 1544 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1545 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1546 1547 # Whether map literals support arbitrary expressions as keys. 1548 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1549 # When False, keys are typically restricted to identifiers. 1550 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1551 1552 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1553 # is true for Snowflake but not for BigQuery which can also process strings 1554 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1555 1556 # Dialects like Databricks support JOINS without join criteria 1557 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1558 ADD_JOIN_ON_TRUE = False 1559 1560 # Whether INTERVAL spans with literal format '\d+ hh:[mm:[ss[.ff]]]' 1561 # can omit the span unit `DAY TO MINUTE` or `DAY TO SECOND` 1562 SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT = False 1563 1564 __slots__ = ( 1565 "error_level", 1566 "error_message_context", 1567 "max_errors", 1568 "dialect", 1569 "sql", 1570 "errors", 1571 "_tokens", 1572 "_index", 1573 "_curr", 1574 "_next", 1575 "_prev", 1576 "_prev_comments", 1577 "_pipe_cte_counter", 1578 ) 1579 1580 # Autofilled 1581 SHOW_TRIE: t.Dict = {} 1582 SET_TRIE: t.Dict = {} 1583 1584 def __init__( 1585 self, 1586 error_level: t.Optional[ErrorLevel] = None, 1587 error_message_context: int = 100, 1588 max_errors: int = 3, 1589 dialect: DialectType = None, 1590 ): 1591 from sqlglot.dialects import Dialect 1592 1593 self.error_level = error_level or ErrorLevel.IMMEDIATE 1594 self.error_message_context = error_message_context 1595 self.max_errors = max_errors 1596 self.dialect = Dialect.get_or_raise(dialect) 1597 self.reset() 1598 1599 def reset(self): 1600 self.sql = "" 1601 self.errors = [] 1602 self._tokens = [] 1603 self._index = 0 1604 self._curr = None 1605 self._next = None 1606 self._prev = None 1607 self._prev_comments = None 1608 self._pipe_cte_counter = 0 1609 1610 def parse( 1611 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1612 ) -> t.List[t.Optional[exp.Expression]]: 1613 """ 1614 Parses a list of tokens and returns a list of syntax trees, one tree 1615 per parsed SQL statement. 1616 1617 Args: 1618 raw_tokens: The list of tokens. 1619 sql: The original SQL string, used to produce helpful debug messages. 1620 1621 Returns: 1622 The list of the produced syntax trees. 1623 """ 1624 return self._parse( 1625 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1626 ) 1627 1628 def parse_into( 1629 self, 1630 expression_types: exp.IntoType, 1631 raw_tokens: t.List[Token], 1632 sql: t.Optional[str] = None, 1633 ) -> t.List[t.Optional[exp.Expression]]: 1634 """ 1635 Parses a list of tokens into a given Expression type. If a collection of Expression 1636 types is given instead, this method will try to parse the token list into each one 1637 of them, stopping at the first for which the parsing succeeds. 1638 1639 Args: 1640 expression_types: The expression type(s) to try and parse the token list into. 1641 raw_tokens: The list of tokens. 1642 sql: The original SQL string, used to produce helpful debug messages. 1643 1644 Returns: 1645 The target Expression. 1646 """ 1647 errors = [] 1648 for expression_type in ensure_list(expression_types): 1649 parser = self.EXPRESSION_PARSERS.get(expression_type) 1650 if not parser: 1651 raise TypeError(f"No parser registered for {expression_type}") 1652 1653 try: 1654 return self._parse(parser, raw_tokens, sql) 1655 except ParseError as e: 1656 e.errors[0]["into_expression"] = expression_type 1657 errors.append(e) 1658 1659 raise ParseError( 1660 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1661 errors=merge_errors(errors), 1662 ) from errors[-1] 1663 1664 def _parse( 1665 self, 1666 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1667 raw_tokens: t.List[Token], 1668 sql: t.Optional[str] = None, 1669 ) -> t.List[t.Optional[exp.Expression]]: 1670 self.reset() 1671 self.sql = sql or "" 1672 1673 total = len(raw_tokens) 1674 chunks: t.List[t.List[Token]] = [[]] 1675 1676 for i, token in enumerate(raw_tokens): 1677 if token.token_type == TokenType.SEMICOLON: 1678 if token.comments: 1679 chunks.append([token]) 1680 1681 if i < total - 1: 1682 chunks.append([]) 1683 else: 1684 chunks[-1].append(token) 1685 1686 expressions = [] 1687 1688 for tokens in chunks: 1689 self._index = -1 1690 self._tokens = tokens 1691 self._advance() 1692 1693 expressions.append(parse_method(self)) 1694 1695 if self._index < len(self._tokens): 1696 self.raise_error("Invalid expression / Unexpected token") 1697 1698 self.check_errors() 1699 1700 return expressions 1701 1702 def check_errors(self) -> None: 1703 """Logs or raises any found errors, depending on the chosen error level setting.""" 1704 if self.error_level == ErrorLevel.WARN: 1705 for error in self.errors: 1706 logger.error(str(error)) 1707 elif self.error_level == ErrorLevel.RAISE and self.errors: 1708 raise ParseError( 1709 concat_messages(self.errors, self.max_errors), 1710 errors=merge_errors(self.errors), 1711 ) 1712 1713 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1714 """ 1715 Appends an error in the list of recorded errors or raises it, depending on the chosen 1716 error level setting. 1717 """ 1718 token = token or self._curr or self._prev or Token.string("") 1719 start = token.start 1720 end = token.end + 1 1721 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1722 highlight = self.sql[start:end] 1723 end_context = self.sql[end : end + self.error_message_context] 1724 1725 error = ParseError.new( 1726 f"{message}. Line {token.line}, Col: {token.col}.\n" 1727 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1728 description=message, 1729 line=token.line, 1730 col=token.col, 1731 start_context=start_context, 1732 highlight=highlight, 1733 end_context=end_context, 1734 ) 1735 1736 if self.error_level == ErrorLevel.IMMEDIATE: 1737 raise error 1738 1739 self.errors.append(error) 1740 1741 def expression( 1742 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1743 ) -> E: 1744 """ 1745 Creates a new, validated Expression. 1746 1747 Args: 1748 exp_class: The expression class to instantiate. 1749 comments: An optional list of comments to attach to the expression. 1750 kwargs: The arguments to set for the expression along with their respective values. 1751 1752 Returns: 1753 The target expression. 1754 """ 1755 instance = exp_class(**kwargs) 1756 instance.add_comments(comments) if comments else self._add_comments(instance) 1757 return self.validate_expression(instance) 1758 1759 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1760 if expression and self._prev_comments: 1761 expression.add_comments(self._prev_comments) 1762 self._prev_comments = None 1763 1764 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1765 """ 1766 Validates an Expression, making sure that all its mandatory arguments are set. 1767 1768 Args: 1769 expression: The expression to validate. 1770 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1771 1772 Returns: 1773 The validated expression. 1774 """ 1775 if self.error_level != ErrorLevel.IGNORE: 1776 for error_message in expression.error_messages(args): 1777 self.raise_error(error_message) 1778 1779 return expression 1780 1781 def _find_sql(self, start: Token, end: Token) -> str: 1782 return self.sql[start.start : end.end + 1] 1783 1784 def _is_connected(self) -> bool: 1785 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1786 1787 def _advance(self, times: int = 1) -> None: 1788 self._index += times 1789 self._curr = seq_get(self._tokens, self._index) 1790 self._next = seq_get(self._tokens, self._index + 1) 1791 1792 if self._index > 0: 1793 self._prev = self._tokens[self._index - 1] 1794 self._prev_comments = self._prev.comments 1795 else: 1796 self._prev = None 1797 self._prev_comments = None 1798 1799 def _retreat(self, index: int) -> None: 1800 if index != self._index: 1801 self._advance(index - self._index) 1802 1803 def _warn_unsupported(self) -> None: 1804 if len(self._tokens) <= 1: 1805 return 1806 1807 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1808 # interested in emitting a warning for the one being currently processed. 1809 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1810 1811 logger.warning( 1812 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1813 ) 1814 1815 def _parse_command(self) -> exp.Command: 1816 self._warn_unsupported() 1817 return self.expression( 1818 exp.Command, 1819 comments=self._prev_comments, 1820 this=self._prev.text.upper(), 1821 expression=self._parse_string(), 1822 ) 1823 1824 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1825 """ 1826 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1827 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1828 solve this by setting & resetting the parser state accordingly 1829 """ 1830 index = self._index 1831 error_level = self.error_level 1832 1833 self.error_level = ErrorLevel.IMMEDIATE 1834 try: 1835 this = parse_method() 1836 except ParseError: 1837 this = None 1838 finally: 1839 if not this or retreat: 1840 self._retreat(index) 1841 self.error_level = error_level 1842 1843 return this 1844 1845 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1846 start = self._prev 1847 exists = self._parse_exists() if allow_exists else None 1848 1849 self._match(TokenType.ON) 1850 1851 materialized = self._match_text_seq("MATERIALIZED") 1852 kind = self._match_set(self.CREATABLES) and self._prev 1853 if not kind: 1854 return self._parse_as_command(start) 1855 1856 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1857 this = self._parse_user_defined_function(kind=kind.token_type) 1858 elif kind.token_type == TokenType.TABLE: 1859 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1860 elif kind.token_type == TokenType.COLUMN: 1861 this = self._parse_column() 1862 else: 1863 this = self._parse_id_var() 1864 1865 self._match(TokenType.IS) 1866 1867 return self.expression( 1868 exp.Comment, 1869 this=this, 1870 kind=kind.text, 1871 expression=self._parse_string(), 1872 exists=exists, 1873 materialized=materialized, 1874 ) 1875 1876 def _parse_to_table( 1877 self, 1878 ) -> exp.ToTableProperty: 1879 table = self._parse_table_parts(schema=True) 1880 return self.expression(exp.ToTableProperty, this=table) 1881 1882 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1883 def _parse_ttl(self) -> exp.Expression: 1884 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1885 this = self._parse_bitwise() 1886 1887 if self._match_text_seq("DELETE"): 1888 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1889 if self._match_text_seq("RECOMPRESS"): 1890 return self.expression( 1891 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1892 ) 1893 if self._match_text_seq("TO", "DISK"): 1894 return self.expression( 1895 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1896 ) 1897 if self._match_text_seq("TO", "VOLUME"): 1898 return self.expression( 1899 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1900 ) 1901 1902 return this 1903 1904 expressions = self._parse_csv(_parse_ttl_action) 1905 where = self._parse_where() 1906 group = self._parse_group() 1907 1908 aggregates = None 1909 if group and self._match(TokenType.SET): 1910 aggregates = self._parse_csv(self._parse_set_item) 1911 1912 return self.expression( 1913 exp.MergeTreeTTL, 1914 expressions=expressions, 1915 where=where, 1916 group=group, 1917 aggregates=aggregates, 1918 ) 1919 1920 def _parse_statement(self) -> t.Optional[exp.Expression]: 1921 if self._curr is None: 1922 return None 1923 1924 if self._match_set(self.STATEMENT_PARSERS): 1925 comments = self._prev_comments 1926 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1927 stmt.add_comments(comments, prepend=True) 1928 return stmt 1929 1930 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1931 return self._parse_command() 1932 1933 expression = self._parse_expression() 1934 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1935 return self._parse_query_modifiers(expression) 1936 1937 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1938 start = self._prev 1939 temporary = self._match(TokenType.TEMPORARY) 1940 materialized = self._match_text_seq("MATERIALIZED") 1941 1942 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1943 if not kind: 1944 return self._parse_as_command(start) 1945 1946 concurrently = self._match_text_seq("CONCURRENTLY") 1947 if_exists = exists or self._parse_exists() 1948 1949 if kind == "COLUMN": 1950 this = self._parse_column() 1951 else: 1952 this = self._parse_table_parts( 1953 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1954 ) 1955 1956 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1957 1958 if self._match(TokenType.L_PAREN, advance=False): 1959 expressions = self._parse_wrapped_csv(self._parse_types) 1960 else: 1961 expressions = None 1962 1963 return self.expression( 1964 exp.Drop, 1965 exists=if_exists, 1966 this=this, 1967 expressions=expressions, 1968 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1969 temporary=temporary, 1970 materialized=materialized, 1971 cascade=self._match_text_seq("CASCADE"), 1972 constraints=self._match_text_seq("CONSTRAINTS"), 1973 purge=self._match_text_seq("PURGE"), 1974 cluster=cluster, 1975 concurrently=concurrently, 1976 ) 1977 1978 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1979 return ( 1980 self._match_text_seq("IF") 1981 and (not not_ or self._match(TokenType.NOT)) 1982 and self._match(TokenType.EXISTS) 1983 ) 1984 1985 def _parse_create(self) -> exp.Create | exp.Command: 1986 # Note: this can't be None because we've matched a statement parser 1987 start = self._prev 1988 1989 replace = ( 1990 start.token_type == TokenType.REPLACE 1991 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1992 or self._match_pair(TokenType.OR, TokenType.ALTER) 1993 ) 1994 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1995 1996 unique = self._match(TokenType.UNIQUE) 1997 1998 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1999 clustered = True 2000 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2001 "COLUMNSTORE" 2002 ): 2003 clustered = False 2004 else: 2005 clustered = None 2006 2007 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2008 self._advance() 2009 2010 properties = None 2011 create_token = self._match_set(self.CREATABLES) and self._prev 2012 2013 if not create_token: 2014 # exp.Properties.Location.POST_CREATE 2015 properties = self._parse_properties() 2016 create_token = self._match_set(self.CREATABLES) and self._prev 2017 2018 if not properties or not create_token: 2019 return self._parse_as_command(start) 2020 2021 concurrently = self._match_text_seq("CONCURRENTLY") 2022 exists = self._parse_exists(not_=True) 2023 this = None 2024 expression: t.Optional[exp.Expression] = None 2025 indexes = None 2026 no_schema_binding = None 2027 begin = None 2028 end = None 2029 clone = None 2030 2031 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2032 nonlocal properties 2033 if properties and temp_props: 2034 properties.expressions.extend(temp_props.expressions) 2035 elif temp_props: 2036 properties = temp_props 2037 2038 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2039 this = self._parse_user_defined_function(kind=create_token.token_type) 2040 2041 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2042 extend_props(self._parse_properties()) 2043 2044 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2045 extend_props(self._parse_properties()) 2046 2047 if not expression: 2048 if self._match(TokenType.COMMAND): 2049 expression = self._parse_as_command(self._prev) 2050 else: 2051 begin = self._match(TokenType.BEGIN) 2052 return_ = self._match_text_seq("RETURN") 2053 2054 if self._match(TokenType.STRING, advance=False): 2055 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2056 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2057 expression = self._parse_string() 2058 extend_props(self._parse_properties()) 2059 else: 2060 expression = self._parse_user_defined_function_expression() 2061 2062 end = self._match_text_seq("END") 2063 2064 if return_: 2065 expression = self.expression(exp.Return, this=expression) 2066 elif create_token.token_type == TokenType.INDEX: 2067 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2068 if not self._match(TokenType.ON): 2069 index = self._parse_id_var() 2070 anonymous = False 2071 else: 2072 index = None 2073 anonymous = True 2074 2075 this = self._parse_index(index=index, anonymous=anonymous) 2076 elif create_token.token_type in self.DB_CREATABLES: 2077 table_parts = self._parse_table_parts( 2078 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2079 ) 2080 2081 # exp.Properties.Location.POST_NAME 2082 self._match(TokenType.COMMA) 2083 extend_props(self._parse_properties(before=True)) 2084 2085 this = self._parse_schema(this=table_parts) 2086 2087 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2088 extend_props(self._parse_properties()) 2089 2090 has_alias = self._match(TokenType.ALIAS) 2091 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2092 # exp.Properties.Location.POST_ALIAS 2093 extend_props(self._parse_properties()) 2094 2095 if create_token.token_type == TokenType.SEQUENCE: 2096 expression = self._parse_types() 2097 props = self._parse_properties() 2098 if props: 2099 sequence_props = exp.SequenceProperties() 2100 options = [] 2101 for prop in props: 2102 if isinstance(prop, exp.SequenceProperties): 2103 for arg, value in prop.args.items(): 2104 if arg == "options": 2105 options.extend(value) 2106 else: 2107 sequence_props.set(arg, value) 2108 prop.pop() 2109 2110 if options: 2111 sequence_props.set("options", options) 2112 2113 props.append("expressions", sequence_props) 2114 extend_props(props) 2115 else: 2116 expression = self._parse_ddl_select() 2117 2118 # Some dialects also support using a table as an alias instead of a SELECT. 2119 # Here we fallback to this as an alternative. 2120 if not expression and has_alias: 2121 expression = self._try_parse(self._parse_table_parts) 2122 2123 if create_token.token_type == TokenType.TABLE: 2124 # exp.Properties.Location.POST_EXPRESSION 2125 extend_props(self._parse_properties()) 2126 2127 indexes = [] 2128 while True: 2129 index = self._parse_index() 2130 2131 # exp.Properties.Location.POST_INDEX 2132 extend_props(self._parse_properties()) 2133 if not index: 2134 break 2135 else: 2136 self._match(TokenType.COMMA) 2137 indexes.append(index) 2138 elif create_token.token_type == TokenType.VIEW: 2139 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2140 no_schema_binding = True 2141 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2142 extend_props(self._parse_properties()) 2143 2144 shallow = self._match_text_seq("SHALLOW") 2145 2146 if self._match_texts(self.CLONE_KEYWORDS): 2147 copy = self._prev.text.lower() == "copy" 2148 clone = self.expression( 2149 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2150 ) 2151 2152 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2153 return self._parse_as_command(start) 2154 2155 create_kind_text = create_token.text.upper() 2156 return self.expression( 2157 exp.Create, 2158 this=this, 2159 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2160 replace=replace, 2161 refresh=refresh, 2162 unique=unique, 2163 expression=expression, 2164 exists=exists, 2165 properties=properties, 2166 indexes=indexes, 2167 no_schema_binding=no_schema_binding, 2168 begin=begin, 2169 end=end, 2170 clone=clone, 2171 concurrently=concurrently, 2172 clustered=clustered, 2173 ) 2174 2175 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2176 seq = exp.SequenceProperties() 2177 2178 options = [] 2179 index = self._index 2180 2181 while self._curr: 2182 self._match(TokenType.COMMA) 2183 if self._match_text_seq("INCREMENT"): 2184 self._match_text_seq("BY") 2185 self._match_text_seq("=") 2186 seq.set("increment", self._parse_term()) 2187 elif self._match_text_seq("MINVALUE"): 2188 seq.set("minvalue", self._parse_term()) 2189 elif self._match_text_seq("MAXVALUE"): 2190 seq.set("maxvalue", self._parse_term()) 2191 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2192 self._match_text_seq("=") 2193 seq.set("start", self._parse_term()) 2194 elif self._match_text_seq("CACHE"): 2195 # T-SQL allows empty CACHE which is initialized dynamically 2196 seq.set("cache", self._parse_number() or True) 2197 elif self._match_text_seq("OWNED", "BY"): 2198 # "OWNED BY NONE" is the default 2199 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2200 else: 2201 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2202 if opt: 2203 options.append(opt) 2204 else: 2205 break 2206 2207 seq.set("options", options if options else None) 2208 return None if self._index == index else seq 2209 2210 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2211 # only used for teradata currently 2212 self._match(TokenType.COMMA) 2213 2214 kwargs = { 2215 "no": self._match_text_seq("NO"), 2216 "dual": self._match_text_seq("DUAL"), 2217 "before": self._match_text_seq("BEFORE"), 2218 "default": self._match_text_seq("DEFAULT"), 2219 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2220 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2221 "after": self._match_text_seq("AFTER"), 2222 "minimum": self._match_texts(("MIN", "MINIMUM")), 2223 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2224 } 2225 2226 if self._match_texts(self.PROPERTY_PARSERS): 2227 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2228 try: 2229 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2230 except TypeError: 2231 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2232 2233 return None 2234 2235 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2236 return self._parse_wrapped_csv(self._parse_property) 2237 2238 def _parse_property(self) -> t.Optional[exp.Expression]: 2239 if self._match_texts(self.PROPERTY_PARSERS): 2240 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2241 2242 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2243 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2244 2245 if self._match_text_seq("COMPOUND", "SORTKEY"): 2246 return self._parse_sortkey(compound=True) 2247 2248 if self._match_text_seq("SQL", "SECURITY"): 2249 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2250 2251 index = self._index 2252 2253 seq_props = self._parse_sequence_properties() 2254 if seq_props: 2255 return seq_props 2256 2257 self._retreat(index) 2258 key = self._parse_column() 2259 2260 if not self._match(TokenType.EQ): 2261 self._retreat(index) 2262 return None 2263 2264 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2265 if isinstance(key, exp.Column): 2266 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2267 2268 value = self._parse_bitwise() or self._parse_var(any_token=True) 2269 2270 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2271 if isinstance(value, exp.Column): 2272 value = exp.var(value.name) 2273 2274 return self.expression(exp.Property, this=key, value=value) 2275 2276 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2277 if self._match_text_seq("BY"): 2278 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2279 2280 self._match(TokenType.ALIAS) 2281 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2282 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2283 2284 return self.expression( 2285 exp.FileFormatProperty, 2286 this=( 2287 self.expression( 2288 exp.InputOutputFormat, 2289 input_format=input_format, 2290 output_format=output_format, 2291 ) 2292 if input_format or output_format 2293 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2294 ), 2295 hive_format=True, 2296 ) 2297 2298 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2299 field = self._parse_field() 2300 if isinstance(field, exp.Identifier) and not field.quoted: 2301 field = exp.var(field) 2302 2303 return field 2304 2305 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2306 self._match(TokenType.EQ) 2307 self._match(TokenType.ALIAS) 2308 2309 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2310 2311 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2312 properties = [] 2313 while True: 2314 if before: 2315 prop = self._parse_property_before() 2316 else: 2317 prop = self._parse_property() 2318 if not prop: 2319 break 2320 for p in ensure_list(prop): 2321 properties.append(p) 2322 2323 if properties: 2324 return self.expression(exp.Properties, expressions=properties) 2325 2326 return None 2327 2328 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2329 return self.expression( 2330 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2331 ) 2332 2333 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2334 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2335 security_specifier = self._prev.text.upper() 2336 return self.expression(exp.SecurityProperty, this=security_specifier) 2337 return None 2338 2339 def _parse_settings_property(self) -> exp.SettingsProperty: 2340 return self.expression( 2341 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2342 ) 2343 2344 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2345 if self._index >= 2: 2346 pre_volatile_token = self._tokens[self._index - 2] 2347 else: 2348 pre_volatile_token = None 2349 2350 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2351 return exp.VolatileProperty() 2352 2353 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2354 2355 def _parse_retention_period(self) -> exp.Var: 2356 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2357 number = self._parse_number() 2358 number_str = f"{number} " if number else "" 2359 unit = self._parse_var(any_token=True) 2360 return exp.var(f"{number_str}{unit}") 2361 2362 def _parse_system_versioning_property( 2363 self, with_: bool = False 2364 ) -> exp.WithSystemVersioningProperty: 2365 self._match(TokenType.EQ) 2366 prop = self.expression( 2367 exp.WithSystemVersioningProperty, 2368 **{ # type: ignore 2369 "on": True, 2370 "with": with_, 2371 }, 2372 ) 2373 2374 if self._match_text_seq("OFF"): 2375 prop.set("on", False) 2376 return prop 2377 2378 self._match(TokenType.ON) 2379 if self._match(TokenType.L_PAREN): 2380 while self._curr and not self._match(TokenType.R_PAREN): 2381 if self._match_text_seq("HISTORY_TABLE", "="): 2382 prop.set("this", self._parse_table_parts()) 2383 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2384 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2385 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2386 prop.set("retention_period", self._parse_retention_period()) 2387 2388 self._match(TokenType.COMMA) 2389 2390 return prop 2391 2392 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2393 self._match(TokenType.EQ) 2394 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2395 prop = self.expression(exp.DataDeletionProperty, on=on) 2396 2397 if self._match(TokenType.L_PAREN): 2398 while self._curr and not self._match(TokenType.R_PAREN): 2399 if self._match_text_seq("FILTER_COLUMN", "="): 2400 prop.set("filter_column", self._parse_column()) 2401 elif self._match_text_seq("RETENTION_PERIOD", "="): 2402 prop.set("retention_period", self._parse_retention_period()) 2403 2404 self._match(TokenType.COMMA) 2405 2406 return prop 2407 2408 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2409 kind = "HASH" 2410 expressions: t.Optional[t.List[exp.Expression]] = None 2411 if self._match_text_seq("BY", "HASH"): 2412 expressions = self._parse_wrapped_csv(self._parse_id_var) 2413 elif self._match_text_seq("BY", "RANDOM"): 2414 kind = "RANDOM" 2415 2416 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2417 buckets: t.Optional[exp.Expression] = None 2418 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2419 buckets = self._parse_number() 2420 2421 return self.expression( 2422 exp.DistributedByProperty, 2423 expressions=expressions, 2424 kind=kind, 2425 buckets=buckets, 2426 order=self._parse_order(), 2427 ) 2428 2429 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2430 self._match_text_seq("KEY") 2431 expressions = self._parse_wrapped_id_vars() 2432 return self.expression(expr_type, expressions=expressions) 2433 2434 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2435 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2436 prop = self._parse_system_versioning_property(with_=True) 2437 self._match_r_paren() 2438 return prop 2439 2440 if self._match(TokenType.L_PAREN, advance=False): 2441 return self._parse_wrapped_properties() 2442 2443 if self._match_text_seq("JOURNAL"): 2444 return self._parse_withjournaltable() 2445 2446 if self._match_texts(self.VIEW_ATTRIBUTES): 2447 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2448 2449 if self._match_text_seq("DATA"): 2450 return self._parse_withdata(no=False) 2451 elif self._match_text_seq("NO", "DATA"): 2452 return self._parse_withdata(no=True) 2453 2454 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2455 return self._parse_serde_properties(with_=True) 2456 2457 if self._match(TokenType.SCHEMA): 2458 return self.expression( 2459 exp.WithSchemaBindingProperty, 2460 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2461 ) 2462 2463 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2464 return self.expression( 2465 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2466 ) 2467 2468 if not self._next: 2469 return None 2470 2471 return self._parse_withisolatedloading() 2472 2473 def _parse_procedure_option(self) -> exp.Expression | None: 2474 if self._match_text_seq("EXECUTE", "AS"): 2475 return self.expression( 2476 exp.ExecuteAsProperty, 2477 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2478 or self._parse_string(), 2479 ) 2480 2481 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2482 2483 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2484 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2485 self._match(TokenType.EQ) 2486 2487 user = self._parse_id_var() 2488 self._match(TokenType.PARAMETER) 2489 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2490 2491 if not user or not host: 2492 return None 2493 2494 return exp.DefinerProperty(this=f"{user}@{host}") 2495 2496 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2497 self._match(TokenType.TABLE) 2498 self._match(TokenType.EQ) 2499 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2500 2501 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2502 return self.expression(exp.LogProperty, no=no) 2503 2504 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2505 return self.expression(exp.JournalProperty, **kwargs) 2506 2507 def _parse_checksum(self) -> exp.ChecksumProperty: 2508 self._match(TokenType.EQ) 2509 2510 on = None 2511 if self._match(TokenType.ON): 2512 on = True 2513 elif self._match_text_seq("OFF"): 2514 on = False 2515 2516 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2517 2518 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2519 return self.expression( 2520 exp.Cluster, 2521 expressions=( 2522 self._parse_wrapped_csv(self._parse_ordered) 2523 if wrapped 2524 else self._parse_csv(self._parse_ordered) 2525 ), 2526 ) 2527 2528 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2529 self._match_text_seq("BY") 2530 2531 self._match_l_paren() 2532 expressions = self._parse_csv(self._parse_column) 2533 self._match_r_paren() 2534 2535 if self._match_text_seq("SORTED", "BY"): 2536 self._match_l_paren() 2537 sorted_by = self._parse_csv(self._parse_ordered) 2538 self._match_r_paren() 2539 else: 2540 sorted_by = None 2541 2542 self._match(TokenType.INTO) 2543 buckets = self._parse_number() 2544 self._match_text_seq("BUCKETS") 2545 2546 return self.expression( 2547 exp.ClusteredByProperty, 2548 expressions=expressions, 2549 sorted_by=sorted_by, 2550 buckets=buckets, 2551 ) 2552 2553 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2554 if not self._match_text_seq("GRANTS"): 2555 self._retreat(self._index - 1) 2556 return None 2557 2558 return self.expression(exp.CopyGrantsProperty) 2559 2560 def _parse_freespace(self) -> exp.FreespaceProperty: 2561 self._match(TokenType.EQ) 2562 return self.expression( 2563 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2564 ) 2565 2566 def _parse_mergeblockratio( 2567 self, no: bool = False, default: bool = False 2568 ) -> exp.MergeBlockRatioProperty: 2569 if self._match(TokenType.EQ): 2570 return self.expression( 2571 exp.MergeBlockRatioProperty, 2572 this=self._parse_number(), 2573 percent=self._match(TokenType.PERCENT), 2574 ) 2575 2576 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2577 2578 def _parse_datablocksize( 2579 self, 2580 default: t.Optional[bool] = None, 2581 minimum: t.Optional[bool] = None, 2582 maximum: t.Optional[bool] = None, 2583 ) -> exp.DataBlocksizeProperty: 2584 self._match(TokenType.EQ) 2585 size = self._parse_number() 2586 2587 units = None 2588 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2589 units = self._prev.text 2590 2591 return self.expression( 2592 exp.DataBlocksizeProperty, 2593 size=size, 2594 units=units, 2595 default=default, 2596 minimum=minimum, 2597 maximum=maximum, 2598 ) 2599 2600 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2601 self._match(TokenType.EQ) 2602 always = self._match_text_seq("ALWAYS") 2603 manual = self._match_text_seq("MANUAL") 2604 never = self._match_text_seq("NEVER") 2605 default = self._match_text_seq("DEFAULT") 2606 2607 autotemp = None 2608 if self._match_text_seq("AUTOTEMP"): 2609 autotemp = self._parse_schema() 2610 2611 return self.expression( 2612 exp.BlockCompressionProperty, 2613 always=always, 2614 manual=manual, 2615 never=never, 2616 default=default, 2617 autotemp=autotemp, 2618 ) 2619 2620 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2621 index = self._index 2622 no = self._match_text_seq("NO") 2623 concurrent = self._match_text_seq("CONCURRENT") 2624 2625 if not self._match_text_seq("ISOLATED", "LOADING"): 2626 self._retreat(index) 2627 return None 2628 2629 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2630 return self.expression( 2631 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2632 ) 2633 2634 def _parse_locking(self) -> exp.LockingProperty: 2635 if self._match(TokenType.TABLE): 2636 kind = "TABLE" 2637 elif self._match(TokenType.VIEW): 2638 kind = "VIEW" 2639 elif self._match(TokenType.ROW): 2640 kind = "ROW" 2641 elif self._match_text_seq("DATABASE"): 2642 kind = "DATABASE" 2643 else: 2644 kind = None 2645 2646 if kind in ("DATABASE", "TABLE", "VIEW"): 2647 this = self._parse_table_parts() 2648 else: 2649 this = None 2650 2651 if self._match(TokenType.FOR): 2652 for_or_in = "FOR" 2653 elif self._match(TokenType.IN): 2654 for_or_in = "IN" 2655 else: 2656 for_or_in = None 2657 2658 if self._match_text_seq("ACCESS"): 2659 lock_type = "ACCESS" 2660 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2661 lock_type = "EXCLUSIVE" 2662 elif self._match_text_seq("SHARE"): 2663 lock_type = "SHARE" 2664 elif self._match_text_seq("READ"): 2665 lock_type = "READ" 2666 elif self._match_text_seq("WRITE"): 2667 lock_type = "WRITE" 2668 elif self._match_text_seq("CHECKSUM"): 2669 lock_type = "CHECKSUM" 2670 else: 2671 lock_type = None 2672 2673 override = self._match_text_seq("OVERRIDE") 2674 2675 return self.expression( 2676 exp.LockingProperty, 2677 this=this, 2678 kind=kind, 2679 for_or_in=for_or_in, 2680 lock_type=lock_type, 2681 override=override, 2682 ) 2683 2684 def _parse_partition_by(self) -> t.List[exp.Expression]: 2685 if self._match(TokenType.PARTITION_BY): 2686 return self._parse_csv(self._parse_assignment) 2687 return [] 2688 2689 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2690 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2691 if self._match_text_seq("MINVALUE"): 2692 return exp.var("MINVALUE") 2693 if self._match_text_seq("MAXVALUE"): 2694 return exp.var("MAXVALUE") 2695 return self._parse_bitwise() 2696 2697 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2698 expression = None 2699 from_expressions = None 2700 to_expressions = None 2701 2702 if self._match(TokenType.IN): 2703 this = self._parse_wrapped_csv(self._parse_bitwise) 2704 elif self._match(TokenType.FROM): 2705 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2706 self._match_text_seq("TO") 2707 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2708 elif self._match_text_seq("WITH", "(", "MODULUS"): 2709 this = self._parse_number() 2710 self._match_text_seq(",", "REMAINDER") 2711 expression = self._parse_number() 2712 self._match_r_paren() 2713 else: 2714 self.raise_error("Failed to parse partition bound spec.") 2715 2716 return self.expression( 2717 exp.PartitionBoundSpec, 2718 this=this, 2719 expression=expression, 2720 from_expressions=from_expressions, 2721 to_expressions=to_expressions, 2722 ) 2723 2724 # https://www.postgresql.org/docs/current/sql-createtable.html 2725 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2726 if not self._match_text_seq("OF"): 2727 self._retreat(self._index - 1) 2728 return None 2729 2730 this = self._parse_table(schema=True) 2731 2732 if self._match(TokenType.DEFAULT): 2733 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2734 elif self._match_text_seq("FOR", "VALUES"): 2735 expression = self._parse_partition_bound_spec() 2736 else: 2737 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2738 2739 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2740 2741 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2742 self._match(TokenType.EQ) 2743 return self.expression( 2744 exp.PartitionedByProperty, 2745 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2746 ) 2747 2748 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2749 if self._match_text_seq("AND", "STATISTICS"): 2750 statistics = True 2751 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2752 statistics = False 2753 else: 2754 statistics = None 2755 2756 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2757 2758 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2759 if self._match_text_seq("SQL"): 2760 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2761 return None 2762 2763 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2764 if self._match_text_seq("SQL", "DATA"): 2765 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2766 return None 2767 2768 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2769 if self._match_text_seq("PRIMARY", "INDEX"): 2770 return exp.NoPrimaryIndexProperty() 2771 if self._match_text_seq("SQL"): 2772 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2773 return None 2774 2775 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2776 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2777 return exp.OnCommitProperty() 2778 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2779 return exp.OnCommitProperty(delete=True) 2780 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2781 2782 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2783 if self._match_text_seq("SQL", "DATA"): 2784 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2785 return None 2786 2787 def _parse_distkey(self) -> exp.DistKeyProperty: 2788 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2789 2790 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2791 table = self._parse_table(schema=True) 2792 2793 options = [] 2794 while self._match_texts(("INCLUDING", "EXCLUDING")): 2795 this = self._prev.text.upper() 2796 2797 id_var = self._parse_id_var() 2798 if not id_var: 2799 return None 2800 2801 options.append( 2802 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2803 ) 2804 2805 return self.expression(exp.LikeProperty, this=table, expressions=options) 2806 2807 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2808 return self.expression( 2809 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2810 ) 2811 2812 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2813 self._match(TokenType.EQ) 2814 return self.expression( 2815 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2816 ) 2817 2818 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2819 self._match_text_seq("WITH", "CONNECTION") 2820 return self.expression( 2821 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2822 ) 2823 2824 def _parse_returns(self) -> exp.ReturnsProperty: 2825 value: t.Optional[exp.Expression] 2826 null = None 2827 is_table = self._match(TokenType.TABLE) 2828 2829 if is_table: 2830 if self._match(TokenType.LT): 2831 value = self.expression( 2832 exp.Schema, 2833 this="TABLE", 2834 expressions=self._parse_csv(self._parse_struct_types), 2835 ) 2836 if not self._match(TokenType.GT): 2837 self.raise_error("Expecting >") 2838 else: 2839 value = self._parse_schema(exp.var("TABLE")) 2840 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2841 null = True 2842 value = None 2843 else: 2844 value = self._parse_types() 2845 2846 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2847 2848 def _parse_describe(self) -> exp.Describe: 2849 kind = self._match_set(self.CREATABLES) and self._prev.text 2850 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2851 if self._match(TokenType.DOT): 2852 style = None 2853 self._retreat(self._index - 2) 2854 2855 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2856 2857 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2858 this = self._parse_statement() 2859 else: 2860 this = self._parse_table(schema=True) 2861 2862 properties = self._parse_properties() 2863 expressions = properties.expressions if properties else None 2864 partition = self._parse_partition() 2865 return self.expression( 2866 exp.Describe, 2867 this=this, 2868 style=style, 2869 kind=kind, 2870 expressions=expressions, 2871 partition=partition, 2872 format=format, 2873 ) 2874 2875 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2876 kind = self._prev.text.upper() 2877 expressions = [] 2878 2879 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2880 if self._match(TokenType.WHEN): 2881 expression = self._parse_disjunction() 2882 self._match(TokenType.THEN) 2883 else: 2884 expression = None 2885 2886 else_ = self._match(TokenType.ELSE) 2887 2888 if not self._match(TokenType.INTO): 2889 return None 2890 2891 return self.expression( 2892 exp.ConditionalInsert, 2893 this=self.expression( 2894 exp.Insert, 2895 this=self._parse_table(schema=True), 2896 expression=self._parse_derived_table_values(), 2897 ), 2898 expression=expression, 2899 else_=else_, 2900 ) 2901 2902 expression = parse_conditional_insert() 2903 while expression is not None: 2904 expressions.append(expression) 2905 expression = parse_conditional_insert() 2906 2907 return self.expression( 2908 exp.MultitableInserts, 2909 kind=kind, 2910 comments=comments, 2911 expressions=expressions, 2912 source=self._parse_table(), 2913 ) 2914 2915 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2916 comments = [] 2917 hint = self._parse_hint() 2918 overwrite = self._match(TokenType.OVERWRITE) 2919 ignore = self._match(TokenType.IGNORE) 2920 local = self._match_text_seq("LOCAL") 2921 alternative = None 2922 is_function = None 2923 2924 if self._match_text_seq("DIRECTORY"): 2925 this: t.Optional[exp.Expression] = self.expression( 2926 exp.Directory, 2927 this=self._parse_var_or_string(), 2928 local=local, 2929 row_format=self._parse_row_format(match_row=True), 2930 ) 2931 else: 2932 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2933 comments += ensure_list(self._prev_comments) 2934 return self._parse_multitable_inserts(comments) 2935 2936 if self._match(TokenType.OR): 2937 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2938 2939 self._match(TokenType.INTO) 2940 comments += ensure_list(self._prev_comments) 2941 self._match(TokenType.TABLE) 2942 is_function = self._match(TokenType.FUNCTION) 2943 2944 this = ( 2945 self._parse_table(schema=True, parse_partition=True) 2946 if not is_function 2947 else self._parse_function() 2948 ) 2949 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2950 this.set("alias", self._parse_table_alias()) 2951 2952 returning = self._parse_returning() 2953 2954 return self.expression( 2955 exp.Insert, 2956 comments=comments, 2957 hint=hint, 2958 is_function=is_function, 2959 this=this, 2960 stored=self._match_text_seq("STORED") and self._parse_stored(), 2961 by_name=self._match_text_seq("BY", "NAME"), 2962 exists=self._parse_exists(), 2963 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2964 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2965 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2966 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2967 conflict=self._parse_on_conflict(), 2968 returning=returning or self._parse_returning(), 2969 overwrite=overwrite, 2970 alternative=alternative, 2971 ignore=ignore, 2972 source=self._match(TokenType.TABLE) and self._parse_table(), 2973 ) 2974 2975 def _parse_kill(self) -> exp.Kill: 2976 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2977 2978 return self.expression( 2979 exp.Kill, 2980 this=self._parse_primary(), 2981 kind=kind, 2982 ) 2983 2984 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2985 conflict = self._match_text_seq("ON", "CONFLICT") 2986 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2987 2988 if not conflict and not duplicate: 2989 return None 2990 2991 conflict_keys = None 2992 constraint = None 2993 2994 if conflict: 2995 if self._match_text_seq("ON", "CONSTRAINT"): 2996 constraint = self._parse_id_var() 2997 elif self._match(TokenType.L_PAREN): 2998 conflict_keys = self._parse_csv(self._parse_id_var) 2999 self._match_r_paren() 3000 3001 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3002 if self._prev.token_type == TokenType.UPDATE: 3003 self._match(TokenType.SET) 3004 expressions = self._parse_csv(self._parse_equality) 3005 else: 3006 expressions = None 3007 3008 return self.expression( 3009 exp.OnConflict, 3010 duplicate=duplicate, 3011 expressions=expressions, 3012 action=action, 3013 conflict_keys=conflict_keys, 3014 constraint=constraint, 3015 where=self._parse_where(), 3016 ) 3017 3018 def _parse_returning(self) -> t.Optional[exp.Returning]: 3019 if not self._match(TokenType.RETURNING): 3020 return None 3021 return self.expression( 3022 exp.Returning, 3023 expressions=self._parse_csv(self._parse_expression), 3024 into=self._match(TokenType.INTO) and self._parse_table_part(), 3025 ) 3026 3027 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3028 if not self._match(TokenType.FORMAT): 3029 return None 3030 return self._parse_row_format() 3031 3032 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3033 index = self._index 3034 with_ = with_ or self._match_text_seq("WITH") 3035 3036 if not self._match(TokenType.SERDE_PROPERTIES): 3037 self._retreat(index) 3038 return None 3039 return self.expression( 3040 exp.SerdeProperties, 3041 **{ # type: ignore 3042 "expressions": self._parse_wrapped_properties(), 3043 "with": with_, 3044 }, 3045 ) 3046 3047 def _parse_row_format( 3048 self, match_row: bool = False 3049 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3050 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3051 return None 3052 3053 if self._match_text_seq("SERDE"): 3054 this = self._parse_string() 3055 3056 serde_properties = self._parse_serde_properties() 3057 3058 return self.expression( 3059 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3060 ) 3061 3062 self._match_text_seq("DELIMITED") 3063 3064 kwargs = {} 3065 3066 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3067 kwargs["fields"] = self._parse_string() 3068 if self._match_text_seq("ESCAPED", "BY"): 3069 kwargs["escaped"] = self._parse_string() 3070 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3071 kwargs["collection_items"] = self._parse_string() 3072 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3073 kwargs["map_keys"] = self._parse_string() 3074 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3075 kwargs["lines"] = self._parse_string() 3076 if self._match_text_seq("NULL", "DEFINED", "AS"): 3077 kwargs["null"] = self._parse_string() 3078 3079 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3080 3081 def _parse_load(self) -> exp.LoadData | exp.Command: 3082 if self._match_text_seq("DATA"): 3083 local = self._match_text_seq("LOCAL") 3084 self._match_text_seq("INPATH") 3085 inpath = self._parse_string() 3086 overwrite = self._match(TokenType.OVERWRITE) 3087 self._match_pair(TokenType.INTO, TokenType.TABLE) 3088 3089 return self.expression( 3090 exp.LoadData, 3091 this=self._parse_table(schema=True), 3092 local=local, 3093 overwrite=overwrite, 3094 inpath=inpath, 3095 partition=self._parse_partition(), 3096 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3097 serde=self._match_text_seq("SERDE") and self._parse_string(), 3098 ) 3099 return self._parse_as_command(self._prev) 3100 3101 def _parse_delete(self) -> exp.Delete: 3102 # This handles MySQL's "Multiple-Table Syntax" 3103 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3104 tables = None 3105 if not self._match(TokenType.FROM, advance=False): 3106 tables = self._parse_csv(self._parse_table) or None 3107 3108 returning = self._parse_returning() 3109 3110 return self.expression( 3111 exp.Delete, 3112 tables=tables, 3113 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3114 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3115 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3116 where=self._parse_where(), 3117 returning=returning or self._parse_returning(), 3118 limit=self._parse_limit(), 3119 ) 3120 3121 def _parse_update(self) -> exp.Update: 3122 kwargs: t.Dict[str, t.Any] = { 3123 "this": self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS), 3124 } 3125 while self._curr: 3126 if self._match(TokenType.SET): 3127 kwargs["expressions"] = self._parse_csv(self._parse_equality) 3128 elif self._match(TokenType.RETURNING, advance=False): 3129 kwargs["returning"] = self._parse_returning() 3130 elif self._match(TokenType.FROM, advance=False): 3131 kwargs["from"] = self._parse_from(joins=True) 3132 elif self._match(TokenType.WHERE, advance=False): 3133 kwargs["where"] = self._parse_where() 3134 elif self._match(TokenType.ORDER_BY, advance=False): 3135 kwargs["order"] = self._parse_order() 3136 elif self._match(TokenType.LIMIT, advance=False): 3137 kwargs["limit"] = self._parse_limit() 3138 else: 3139 break 3140 3141 return self.expression(exp.Update, **kwargs) 3142 3143 def _parse_use(self) -> exp.Use: 3144 return self.expression( 3145 exp.Use, 3146 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3147 this=self._parse_table(schema=False), 3148 ) 3149 3150 def _parse_uncache(self) -> exp.Uncache: 3151 if not self._match(TokenType.TABLE): 3152 self.raise_error("Expecting TABLE after UNCACHE") 3153 3154 return self.expression( 3155 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3156 ) 3157 3158 def _parse_cache(self) -> exp.Cache: 3159 lazy = self._match_text_seq("LAZY") 3160 self._match(TokenType.TABLE) 3161 table = self._parse_table(schema=True) 3162 3163 options = [] 3164 if self._match_text_seq("OPTIONS"): 3165 self._match_l_paren() 3166 k = self._parse_string() 3167 self._match(TokenType.EQ) 3168 v = self._parse_string() 3169 options = [k, v] 3170 self._match_r_paren() 3171 3172 self._match(TokenType.ALIAS) 3173 return self.expression( 3174 exp.Cache, 3175 this=table, 3176 lazy=lazy, 3177 options=options, 3178 expression=self._parse_select(nested=True), 3179 ) 3180 3181 def _parse_partition(self) -> t.Optional[exp.Partition]: 3182 if not self._match_texts(self.PARTITION_KEYWORDS): 3183 return None 3184 3185 return self.expression( 3186 exp.Partition, 3187 subpartition=self._prev.text.upper() == "SUBPARTITION", 3188 expressions=self._parse_wrapped_csv(self._parse_assignment), 3189 ) 3190 3191 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3192 def _parse_value_expression() -> t.Optional[exp.Expression]: 3193 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3194 return exp.var(self._prev.text.upper()) 3195 return self._parse_expression() 3196 3197 if self._match(TokenType.L_PAREN): 3198 expressions = self._parse_csv(_parse_value_expression) 3199 self._match_r_paren() 3200 return self.expression(exp.Tuple, expressions=expressions) 3201 3202 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3203 expression = self._parse_expression() 3204 if expression: 3205 return self.expression(exp.Tuple, expressions=[expression]) 3206 return None 3207 3208 def _parse_projections(self) -> t.List[exp.Expression]: 3209 return self._parse_expressions() 3210 3211 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3212 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3213 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3214 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3215 ) 3216 elif self._match(TokenType.FROM): 3217 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3218 # Support parentheses for duckdb FROM-first syntax 3219 select = self._parse_select(from_=from_) 3220 if select: 3221 if not select.args.get("from"): 3222 select.set("from", from_) 3223 this = select 3224 else: 3225 this = exp.select("*").from_(t.cast(exp.From, from_)) 3226 else: 3227 this = ( 3228 self._parse_table(consume_pipe=True) 3229 if table 3230 else self._parse_select(nested=True, parse_set_operation=False) 3231 ) 3232 3233 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3234 # in case a modifier (e.g. join) is following 3235 if table and isinstance(this, exp.Values) and this.alias: 3236 alias = this.args["alias"].pop() 3237 this = exp.Table(this=this, alias=alias) 3238 3239 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3240 3241 return this 3242 3243 def _parse_select( 3244 self, 3245 nested: bool = False, 3246 table: bool = False, 3247 parse_subquery_alias: bool = True, 3248 parse_set_operation: bool = True, 3249 consume_pipe: bool = True, 3250 from_: t.Optional[exp.From] = None, 3251 ) -> t.Optional[exp.Expression]: 3252 query = self._parse_select_query( 3253 nested=nested, 3254 table=table, 3255 parse_subquery_alias=parse_subquery_alias, 3256 parse_set_operation=parse_set_operation, 3257 ) 3258 3259 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3260 if not query and from_: 3261 query = exp.select("*").from_(from_) 3262 if isinstance(query, exp.Query): 3263 query = self._parse_pipe_syntax_query(query) 3264 query = query.subquery(copy=False) if query and table else query 3265 3266 return query 3267 3268 def _parse_select_query( 3269 self, 3270 nested: bool = False, 3271 table: bool = False, 3272 parse_subquery_alias: bool = True, 3273 parse_set_operation: bool = True, 3274 ) -> t.Optional[exp.Expression]: 3275 cte = self._parse_with() 3276 3277 if cte: 3278 this = self._parse_statement() 3279 3280 if not this: 3281 self.raise_error("Failed to parse any statement following CTE") 3282 return cte 3283 3284 if "with" in this.arg_types: 3285 this.set("with", cte) 3286 else: 3287 self.raise_error(f"{this.key} does not support CTE") 3288 this = cte 3289 3290 return this 3291 3292 # duckdb supports leading with FROM x 3293 from_ = ( 3294 self._parse_from(consume_pipe=True) 3295 if self._match(TokenType.FROM, advance=False) 3296 else None 3297 ) 3298 3299 if self._match(TokenType.SELECT): 3300 comments = self._prev_comments 3301 3302 hint = self._parse_hint() 3303 3304 if self._next and not self._next.token_type == TokenType.DOT: 3305 all_ = self._match(TokenType.ALL) 3306 distinct = self._match_set(self.DISTINCT_TOKENS) 3307 else: 3308 all_, distinct = None, None 3309 3310 kind = ( 3311 self._match(TokenType.ALIAS) 3312 and self._match_texts(("STRUCT", "VALUE")) 3313 and self._prev.text.upper() 3314 ) 3315 3316 if distinct: 3317 distinct = self.expression( 3318 exp.Distinct, 3319 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3320 ) 3321 3322 if all_ and distinct: 3323 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3324 3325 operation_modifiers = [] 3326 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3327 operation_modifiers.append(exp.var(self._prev.text.upper())) 3328 3329 limit = self._parse_limit(top=True) 3330 projections = self._parse_projections() 3331 3332 this = self.expression( 3333 exp.Select, 3334 kind=kind, 3335 hint=hint, 3336 distinct=distinct, 3337 expressions=projections, 3338 limit=limit, 3339 operation_modifiers=operation_modifiers or None, 3340 ) 3341 this.comments = comments 3342 3343 into = self._parse_into() 3344 if into: 3345 this.set("into", into) 3346 3347 if not from_: 3348 from_ = self._parse_from() 3349 3350 if from_: 3351 this.set("from", from_) 3352 3353 this = self._parse_query_modifiers(this) 3354 elif (table or nested) and self._match(TokenType.L_PAREN): 3355 this = self._parse_wrapped_select(table=table) 3356 3357 # We return early here so that the UNION isn't attached to the subquery by the 3358 # following call to _parse_set_operations, but instead becomes the parent node 3359 self._match_r_paren() 3360 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3361 elif self._match(TokenType.VALUES, advance=False): 3362 this = self._parse_derived_table_values() 3363 elif from_: 3364 this = exp.select("*").from_(from_.this, copy=False) 3365 elif self._match(TokenType.SUMMARIZE): 3366 table = self._match(TokenType.TABLE) 3367 this = self._parse_select() or self._parse_string() or self._parse_table() 3368 return self.expression(exp.Summarize, this=this, table=table) 3369 elif self._match(TokenType.DESCRIBE): 3370 this = self._parse_describe() 3371 elif self._match_text_seq("STREAM"): 3372 this = self._parse_function() 3373 if this: 3374 this = self.expression(exp.Stream, this=this) 3375 else: 3376 self._retreat(self._index - 1) 3377 else: 3378 this = None 3379 3380 return self._parse_set_operations(this) if parse_set_operation else this 3381 3382 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3383 self._match_text_seq("SEARCH") 3384 3385 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3386 3387 if not kind: 3388 return None 3389 3390 self._match_text_seq("FIRST", "BY") 3391 3392 return self.expression( 3393 exp.RecursiveWithSearch, 3394 kind=kind, 3395 this=self._parse_id_var(), 3396 expression=self._match_text_seq("SET") and self._parse_id_var(), 3397 using=self._match_text_seq("USING") and self._parse_id_var(), 3398 ) 3399 3400 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3401 if not skip_with_token and not self._match(TokenType.WITH): 3402 return None 3403 3404 comments = self._prev_comments 3405 recursive = self._match(TokenType.RECURSIVE) 3406 3407 last_comments = None 3408 expressions = [] 3409 while True: 3410 cte = self._parse_cte() 3411 if isinstance(cte, exp.CTE): 3412 expressions.append(cte) 3413 if last_comments: 3414 cte.add_comments(last_comments) 3415 3416 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3417 break 3418 else: 3419 self._match(TokenType.WITH) 3420 3421 last_comments = self._prev_comments 3422 3423 return self.expression( 3424 exp.With, 3425 comments=comments, 3426 expressions=expressions, 3427 recursive=recursive, 3428 search=self._parse_recursive_with_search(), 3429 ) 3430 3431 def _parse_cte(self) -> t.Optional[exp.CTE]: 3432 index = self._index 3433 3434 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3435 if not alias or not alias.this: 3436 self.raise_error("Expected CTE to have alias") 3437 3438 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3439 self._retreat(index) 3440 return None 3441 3442 comments = self._prev_comments 3443 3444 if self._match_text_seq("NOT", "MATERIALIZED"): 3445 materialized = False 3446 elif self._match_text_seq("MATERIALIZED"): 3447 materialized = True 3448 else: 3449 materialized = None 3450 3451 cte = self.expression( 3452 exp.CTE, 3453 this=self._parse_wrapped(self._parse_statement), 3454 alias=alias, 3455 materialized=materialized, 3456 comments=comments, 3457 ) 3458 3459 values = cte.this 3460 if isinstance(values, exp.Values): 3461 if values.alias: 3462 cte.set("this", exp.select("*").from_(values)) 3463 else: 3464 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3465 3466 return cte 3467 3468 def _parse_table_alias( 3469 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3470 ) -> t.Optional[exp.TableAlias]: 3471 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3472 # so this section tries to parse the clause version and if it fails, it treats the token 3473 # as an identifier (alias) 3474 if self._can_parse_limit_or_offset(): 3475 return None 3476 3477 any_token = self._match(TokenType.ALIAS) 3478 alias = ( 3479 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3480 or self._parse_string_as_identifier() 3481 ) 3482 3483 index = self._index 3484 if self._match(TokenType.L_PAREN): 3485 columns = self._parse_csv(self._parse_function_parameter) 3486 self._match_r_paren() if columns else self._retreat(index) 3487 else: 3488 columns = None 3489 3490 if not alias and not columns: 3491 return None 3492 3493 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3494 3495 # We bubble up comments from the Identifier to the TableAlias 3496 if isinstance(alias, exp.Identifier): 3497 table_alias.add_comments(alias.pop_comments()) 3498 3499 return table_alias 3500 3501 def _parse_subquery( 3502 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3503 ) -> t.Optional[exp.Subquery]: 3504 if not this: 3505 return None 3506 3507 return self.expression( 3508 exp.Subquery, 3509 this=this, 3510 pivots=self._parse_pivots(), 3511 alias=self._parse_table_alias() if parse_alias else None, 3512 sample=self._parse_table_sample(), 3513 ) 3514 3515 def _implicit_unnests_to_explicit(self, this: E) -> E: 3516 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3517 3518 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3519 for i, join in enumerate(this.args.get("joins") or []): 3520 table = join.this 3521 normalized_table = table.copy() 3522 normalized_table.meta["maybe_column"] = True 3523 normalized_table = _norm(normalized_table, dialect=self.dialect) 3524 3525 if isinstance(table, exp.Table) and not join.args.get("on"): 3526 if normalized_table.parts[0].name in refs: 3527 table_as_column = table.to_column() 3528 unnest = exp.Unnest(expressions=[table_as_column]) 3529 3530 # Table.to_column creates a parent Alias node that we want to convert to 3531 # a TableAlias and attach to the Unnest, so it matches the parser's output 3532 if isinstance(table.args.get("alias"), exp.TableAlias): 3533 table_as_column.replace(table_as_column.this) 3534 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3535 3536 table.replace(unnest) 3537 3538 refs.add(normalized_table.alias_or_name) 3539 3540 return this 3541 3542 def _parse_query_modifiers( 3543 self, this: t.Optional[exp.Expression] 3544 ) -> t.Optional[exp.Expression]: 3545 if isinstance(this, self.MODIFIABLES): 3546 for join in self._parse_joins(): 3547 this.append("joins", join) 3548 for lateral in iter(self._parse_lateral, None): 3549 this.append("laterals", lateral) 3550 3551 while True: 3552 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3553 modifier_token = self._curr 3554 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3555 key, expression = parser(self) 3556 3557 if expression: 3558 if this.args.get(key): 3559 self.raise_error( 3560 f"Found multiple '{modifier_token.text.upper()}' clauses", 3561 token=modifier_token, 3562 ) 3563 3564 this.set(key, expression) 3565 if key == "limit": 3566 offset = expression.args.pop("offset", None) 3567 3568 if offset: 3569 offset = exp.Offset(expression=offset) 3570 this.set("offset", offset) 3571 3572 limit_by_expressions = expression.expressions 3573 expression.set("expressions", None) 3574 offset.set("expressions", limit_by_expressions) 3575 continue 3576 break 3577 3578 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3579 this = self._implicit_unnests_to_explicit(this) 3580 3581 return this 3582 3583 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3584 start = self._curr 3585 while self._curr: 3586 self._advance() 3587 3588 end = self._tokens[self._index - 1] 3589 return exp.Hint(expressions=[self._find_sql(start, end)]) 3590 3591 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3592 return self._parse_function_call() 3593 3594 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3595 start_index = self._index 3596 should_fallback_to_string = False 3597 3598 hints = [] 3599 try: 3600 for hint in iter( 3601 lambda: self._parse_csv( 3602 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3603 ), 3604 [], 3605 ): 3606 hints.extend(hint) 3607 except ParseError: 3608 should_fallback_to_string = True 3609 3610 if should_fallback_to_string or self._curr: 3611 self._retreat(start_index) 3612 return self._parse_hint_fallback_to_string() 3613 3614 return self.expression(exp.Hint, expressions=hints) 3615 3616 def _parse_hint(self) -> t.Optional[exp.Hint]: 3617 if self._match(TokenType.HINT) and self._prev_comments: 3618 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3619 3620 return None 3621 3622 def _parse_into(self) -> t.Optional[exp.Into]: 3623 if not self._match(TokenType.INTO): 3624 return None 3625 3626 temp = self._match(TokenType.TEMPORARY) 3627 unlogged = self._match_text_seq("UNLOGGED") 3628 self._match(TokenType.TABLE) 3629 3630 return self.expression( 3631 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3632 ) 3633 3634 def _parse_from( 3635 self, 3636 joins: bool = False, 3637 skip_from_token: bool = False, 3638 consume_pipe: bool = False, 3639 ) -> t.Optional[exp.From]: 3640 if not skip_from_token and not self._match(TokenType.FROM): 3641 return None 3642 3643 return self.expression( 3644 exp.From, 3645 comments=self._prev_comments, 3646 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3647 ) 3648 3649 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3650 return self.expression( 3651 exp.MatchRecognizeMeasure, 3652 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3653 this=self._parse_expression(), 3654 ) 3655 3656 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3657 if not self._match(TokenType.MATCH_RECOGNIZE): 3658 return None 3659 3660 self._match_l_paren() 3661 3662 partition = self._parse_partition_by() 3663 order = self._parse_order() 3664 3665 measures = ( 3666 self._parse_csv(self._parse_match_recognize_measure) 3667 if self._match_text_seq("MEASURES") 3668 else None 3669 ) 3670 3671 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3672 rows = exp.var("ONE ROW PER MATCH") 3673 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3674 text = "ALL ROWS PER MATCH" 3675 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3676 text += " SHOW EMPTY MATCHES" 3677 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3678 text += " OMIT EMPTY MATCHES" 3679 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3680 text += " WITH UNMATCHED ROWS" 3681 rows = exp.var(text) 3682 else: 3683 rows = None 3684 3685 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3686 text = "AFTER MATCH SKIP" 3687 if self._match_text_seq("PAST", "LAST", "ROW"): 3688 text += " PAST LAST ROW" 3689 elif self._match_text_seq("TO", "NEXT", "ROW"): 3690 text += " TO NEXT ROW" 3691 elif self._match_text_seq("TO", "FIRST"): 3692 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3693 elif self._match_text_seq("TO", "LAST"): 3694 text += f" TO LAST {self._advance_any().text}" # type: ignore 3695 after = exp.var(text) 3696 else: 3697 after = None 3698 3699 if self._match_text_seq("PATTERN"): 3700 self._match_l_paren() 3701 3702 if not self._curr: 3703 self.raise_error("Expecting )", self._curr) 3704 3705 paren = 1 3706 start = self._curr 3707 3708 while self._curr and paren > 0: 3709 if self._curr.token_type == TokenType.L_PAREN: 3710 paren += 1 3711 if self._curr.token_type == TokenType.R_PAREN: 3712 paren -= 1 3713 3714 end = self._prev 3715 self._advance() 3716 3717 if paren > 0: 3718 self.raise_error("Expecting )", self._curr) 3719 3720 pattern = exp.var(self._find_sql(start, end)) 3721 else: 3722 pattern = None 3723 3724 define = ( 3725 self._parse_csv(self._parse_name_as_expression) 3726 if self._match_text_seq("DEFINE") 3727 else None 3728 ) 3729 3730 self._match_r_paren() 3731 3732 return self.expression( 3733 exp.MatchRecognize, 3734 partition_by=partition, 3735 order=order, 3736 measures=measures, 3737 rows=rows, 3738 after=after, 3739 pattern=pattern, 3740 define=define, 3741 alias=self._parse_table_alias(), 3742 ) 3743 3744 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3745 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3746 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3747 cross_apply = False 3748 3749 if cross_apply is not None: 3750 this = self._parse_select(table=True) 3751 view = None 3752 outer = None 3753 elif self._match(TokenType.LATERAL): 3754 this = self._parse_select(table=True) 3755 view = self._match(TokenType.VIEW) 3756 outer = self._match(TokenType.OUTER) 3757 else: 3758 return None 3759 3760 if not this: 3761 this = ( 3762 self._parse_unnest() 3763 or self._parse_function() 3764 or self._parse_id_var(any_token=False) 3765 ) 3766 3767 while self._match(TokenType.DOT): 3768 this = exp.Dot( 3769 this=this, 3770 expression=self._parse_function() or self._parse_id_var(any_token=False), 3771 ) 3772 3773 ordinality: t.Optional[bool] = None 3774 3775 if view: 3776 table = self._parse_id_var(any_token=False) 3777 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3778 table_alias: t.Optional[exp.TableAlias] = self.expression( 3779 exp.TableAlias, this=table, columns=columns 3780 ) 3781 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3782 # We move the alias from the lateral's child node to the lateral itself 3783 table_alias = this.args["alias"].pop() 3784 else: 3785 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3786 table_alias = self._parse_table_alias() 3787 3788 return self.expression( 3789 exp.Lateral, 3790 this=this, 3791 view=view, 3792 outer=outer, 3793 alias=table_alias, 3794 cross_apply=cross_apply, 3795 ordinality=ordinality, 3796 ) 3797 3798 def _parse_join_parts( 3799 self, 3800 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3801 return ( 3802 self._match_set(self.JOIN_METHODS) and self._prev, 3803 self._match_set(self.JOIN_SIDES) and self._prev, 3804 self._match_set(self.JOIN_KINDS) and self._prev, 3805 ) 3806 3807 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3808 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3809 this = self._parse_column() 3810 if isinstance(this, exp.Column): 3811 return this.this 3812 return this 3813 3814 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3815 3816 def _parse_join( 3817 self, skip_join_token: bool = False, parse_bracket: bool = False 3818 ) -> t.Optional[exp.Join]: 3819 if self._match(TokenType.COMMA): 3820 table = self._try_parse(self._parse_table) 3821 cross_join = self.expression(exp.Join, this=table) if table else None 3822 3823 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3824 cross_join.set("kind", "CROSS") 3825 3826 return cross_join 3827 3828 index = self._index 3829 method, side, kind = self._parse_join_parts() 3830 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3831 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3832 join_comments = self._prev_comments 3833 3834 if not skip_join_token and not join: 3835 self._retreat(index) 3836 kind = None 3837 method = None 3838 side = None 3839 3840 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3841 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3842 3843 if not skip_join_token and not join and not outer_apply and not cross_apply: 3844 return None 3845 3846 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3847 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3848 kwargs["expressions"] = self._parse_csv( 3849 lambda: self._parse_table(parse_bracket=parse_bracket) 3850 ) 3851 3852 if method: 3853 kwargs["method"] = method.text 3854 if side: 3855 kwargs["side"] = side.text 3856 if kind: 3857 kwargs["kind"] = kind.text 3858 if hint: 3859 kwargs["hint"] = hint 3860 3861 if self._match(TokenType.MATCH_CONDITION): 3862 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3863 3864 if self._match(TokenType.ON): 3865 kwargs["on"] = self._parse_assignment() 3866 elif self._match(TokenType.USING): 3867 kwargs["using"] = self._parse_using_identifiers() 3868 elif ( 3869 not method 3870 and not (outer_apply or cross_apply) 3871 and not isinstance(kwargs["this"], exp.Unnest) 3872 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3873 ): 3874 index = self._index 3875 joins: t.Optional[list] = list(self._parse_joins()) 3876 3877 if joins and self._match(TokenType.ON): 3878 kwargs["on"] = self._parse_assignment() 3879 elif joins and self._match(TokenType.USING): 3880 kwargs["using"] = self._parse_using_identifiers() 3881 else: 3882 joins = None 3883 self._retreat(index) 3884 3885 kwargs["this"].set("joins", joins if joins else None) 3886 3887 kwargs["pivots"] = self._parse_pivots() 3888 3889 comments = [c for token in (method, side, kind) if token for c in token.comments] 3890 comments = (join_comments or []) + comments 3891 3892 if ( 3893 self.ADD_JOIN_ON_TRUE 3894 and not kwargs.get("on") 3895 and not kwargs.get("using") 3896 and not kwargs.get("method") 3897 and kwargs.get("kind") in (None, "INNER", "OUTER") 3898 ): 3899 kwargs["on"] = exp.true() 3900 3901 return self.expression(exp.Join, comments=comments, **kwargs) 3902 3903 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3904 this = self._parse_assignment() 3905 3906 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3907 return this 3908 3909 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3910 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3911 3912 return this 3913 3914 def _parse_index_params(self) -> exp.IndexParameters: 3915 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3916 3917 if self._match(TokenType.L_PAREN, advance=False): 3918 columns = self._parse_wrapped_csv(self._parse_with_operator) 3919 else: 3920 columns = None 3921 3922 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3923 partition_by = self._parse_partition_by() 3924 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3925 tablespace = ( 3926 self._parse_var(any_token=True) 3927 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3928 else None 3929 ) 3930 where = self._parse_where() 3931 3932 on = self._parse_field() if self._match(TokenType.ON) else None 3933 3934 return self.expression( 3935 exp.IndexParameters, 3936 using=using, 3937 columns=columns, 3938 include=include, 3939 partition_by=partition_by, 3940 where=where, 3941 with_storage=with_storage, 3942 tablespace=tablespace, 3943 on=on, 3944 ) 3945 3946 def _parse_index( 3947 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3948 ) -> t.Optional[exp.Index]: 3949 if index or anonymous: 3950 unique = None 3951 primary = None 3952 amp = None 3953 3954 self._match(TokenType.ON) 3955 self._match(TokenType.TABLE) # hive 3956 table = self._parse_table_parts(schema=True) 3957 else: 3958 unique = self._match(TokenType.UNIQUE) 3959 primary = self._match_text_seq("PRIMARY") 3960 amp = self._match_text_seq("AMP") 3961 3962 if not self._match(TokenType.INDEX): 3963 return None 3964 3965 index = self._parse_id_var() 3966 table = None 3967 3968 params = self._parse_index_params() 3969 3970 return self.expression( 3971 exp.Index, 3972 this=index, 3973 table=table, 3974 unique=unique, 3975 primary=primary, 3976 amp=amp, 3977 params=params, 3978 ) 3979 3980 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3981 hints: t.List[exp.Expression] = [] 3982 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3983 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3984 hints.append( 3985 self.expression( 3986 exp.WithTableHint, 3987 expressions=self._parse_csv( 3988 lambda: self._parse_function() or self._parse_var(any_token=True) 3989 ), 3990 ) 3991 ) 3992 self._match_r_paren() 3993 else: 3994 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3995 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3996 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3997 3998 self._match_set((TokenType.INDEX, TokenType.KEY)) 3999 if self._match(TokenType.FOR): 4000 hint.set("target", self._advance_any() and self._prev.text.upper()) 4001 4002 hint.set("expressions", self._parse_wrapped_id_vars()) 4003 hints.append(hint) 4004 4005 return hints or None 4006 4007 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 4008 return ( 4009 (not schema and self._parse_function(optional_parens=False)) 4010 or self._parse_id_var(any_token=False) 4011 or self._parse_string_as_identifier() 4012 or self._parse_placeholder() 4013 ) 4014 4015 def _parse_table_parts( 4016 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 4017 ) -> exp.Table: 4018 catalog = None 4019 db = None 4020 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 4021 4022 while self._match(TokenType.DOT): 4023 if catalog: 4024 # This allows nesting the table in arbitrarily many dot expressions if needed 4025 table = self.expression( 4026 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4027 ) 4028 else: 4029 catalog = db 4030 db = table 4031 # "" used for tsql FROM a..b case 4032 table = self._parse_table_part(schema=schema) or "" 4033 4034 if ( 4035 wildcard 4036 and self._is_connected() 4037 and (isinstance(table, exp.Identifier) or not table) 4038 and self._match(TokenType.STAR) 4039 ): 4040 if isinstance(table, exp.Identifier): 4041 table.args["this"] += "*" 4042 else: 4043 table = exp.Identifier(this="*") 4044 4045 # We bubble up comments from the Identifier to the Table 4046 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4047 4048 if is_db_reference: 4049 catalog = db 4050 db = table 4051 table = None 4052 4053 if not table and not is_db_reference: 4054 self.raise_error(f"Expected table name but got {self._curr}") 4055 if not db and is_db_reference: 4056 self.raise_error(f"Expected database name but got {self._curr}") 4057 4058 table = self.expression( 4059 exp.Table, 4060 comments=comments, 4061 this=table, 4062 db=db, 4063 catalog=catalog, 4064 ) 4065 4066 changes = self._parse_changes() 4067 if changes: 4068 table.set("changes", changes) 4069 4070 at_before = self._parse_historical_data() 4071 if at_before: 4072 table.set("when", at_before) 4073 4074 pivots = self._parse_pivots() 4075 if pivots: 4076 table.set("pivots", pivots) 4077 4078 return table 4079 4080 def _parse_table( 4081 self, 4082 schema: bool = False, 4083 joins: bool = False, 4084 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4085 parse_bracket: bool = False, 4086 is_db_reference: bool = False, 4087 parse_partition: bool = False, 4088 consume_pipe: bool = False, 4089 ) -> t.Optional[exp.Expression]: 4090 lateral = self._parse_lateral() 4091 if lateral: 4092 return lateral 4093 4094 unnest = self._parse_unnest() 4095 if unnest: 4096 return unnest 4097 4098 values = self._parse_derived_table_values() 4099 if values: 4100 return values 4101 4102 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4103 if subquery: 4104 if not subquery.args.get("pivots"): 4105 subquery.set("pivots", self._parse_pivots()) 4106 return subquery 4107 4108 bracket = parse_bracket and self._parse_bracket(None) 4109 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4110 4111 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4112 self._parse_table 4113 ) 4114 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4115 4116 only = self._match(TokenType.ONLY) 4117 4118 this = t.cast( 4119 exp.Expression, 4120 bracket 4121 or rows_from 4122 or self._parse_bracket( 4123 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4124 ), 4125 ) 4126 4127 if only: 4128 this.set("only", only) 4129 4130 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4131 self._match_text_seq("*") 4132 4133 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4134 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4135 this.set("partition", self._parse_partition()) 4136 4137 if schema: 4138 return self._parse_schema(this=this) 4139 4140 version = self._parse_version() 4141 4142 if version: 4143 this.set("version", version) 4144 4145 if self.dialect.ALIAS_POST_TABLESAMPLE: 4146 this.set("sample", self._parse_table_sample()) 4147 4148 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4149 if alias: 4150 this.set("alias", alias) 4151 4152 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4153 return self.expression( 4154 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4155 ) 4156 4157 this.set("hints", self._parse_table_hints()) 4158 4159 if not this.args.get("pivots"): 4160 this.set("pivots", self._parse_pivots()) 4161 4162 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4163 this.set("sample", self._parse_table_sample()) 4164 4165 if joins: 4166 for join in self._parse_joins(): 4167 this.append("joins", join) 4168 4169 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4170 this.set("ordinality", True) 4171 this.set("alias", self._parse_table_alias()) 4172 4173 return this 4174 4175 def _parse_version(self) -> t.Optional[exp.Version]: 4176 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4177 this = "TIMESTAMP" 4178 elif self._match(TokenType.VERSION_SNAPSHOT): 4179 this = "VERSION" 4180 else: 4181 return None 4182 4183 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4184 kind = self._prev.text.upper() 4185 start = self._parse_bitwise() 4186 self._match_texts(("TO", "AND")) 4187 end = self._parse_bitwise() 4188 expression: t.Optional[exp.Expression] = self.expression( 4189 exp.Tuple, expressions=[start, end] 4190 ) 4191 elif self._match_text_seq("CONTAINED", "IN"): 4192 kind = "CONTAINED IN" 4193 expression = self.expression( 4194 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4195 ) 4196 elif self._match(TokenType.ALL): 4197 kind = "ALL" 4198 expression = None 4199 else: 4200 self._match_text_seq("AS", "OF") 4201 kind = "AS OF" 4202 expression = self._parse_type() 4203 4204 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4205 4206 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4207 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4208 index = self._index 4209 historical_data = None 4210 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4211 this = self._prev.text.upper() 4212 kind = ( 4213 self._match(TokenType.L_PAREN) 4214 and self._match_texts(self.HISTORICAL_DATA_KIND) 4215 and self._prev.text.upper() 4216 ) 4217 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4218 4219 if expression: 4220 self._match_r_paren() 4221 historical_data = self.expression( 4222 exp.HistoricalData, this=this, kind=kind, expression=expression 4223 ) 4224 else: 4225 self._retreat(index) 4226 4227 return historical_data 4228 4229 def _parse_changes(self) -> t.Optional[exp.Changes]: 4230 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4231 return None 4232 4233 information = self._parse_var(any_token=True) 4234 self._match_r_paren() 4235 4236 return self.expression( 4237 exp.Changes, 4238 information=information, 4239 at_before=self._parse_historical_data(), 4240 end=self._parse_historical_data(), 4241 ) 4242 4243 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4244 if not self._match_pair(TokenType.UNNEST, TokenType.L_PAREN, advance=False): 4245 return None 4246 4247 self._advance() 4248 4249 expressions = self._parse_wrapped_csv(self._parse_equality) 4250 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4251 4252 alias = self._parse_table_alias() if with_alias else None 4253 4254 if alias: 4255 if self.dialect.UNNEST_COLUMN_ONLY: 4256 if alias.args.get("columns"): 4257 self.raise_error("Unexpected extra column alias in unnest.") 4258 4259 alias.set("columns", [alias.this]) 4260 alias.set("this", None) 4261 4262 columns = alias.args.get("columns") or [] 4263 if offset and len(expressions) < len(columns): 4264 offset = columns.pop() 4265 4266 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4267 self._match(TokenType.ALIAS) 4268 offset = self._parse_id_var( 4269 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4270 ) or exp.to_identifier("offset") 4271 4272 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4273 4274 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4275 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4276 if not is_derived and not ( 4277 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4278 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4279 ): 4280 return None 4281 4282 expressions = self._parse_csv(self._parse_value) 4283 alias = self._parse_table_alias() 4284 4285 if is_derived: 4286 self._match_r_paren() 4287 4288 return self.expression( 4289 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4290 ) 4291 4292 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4293 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4294 as_modifier and self._match_text_seq("USING", "SAMPLE") 4295 ): 4296 return None 4297 4298 bucket_numerator = None 4299 bucket_denominator = None 4300 bucket_field = None 4301 percent = None 4302 size = None 4303 seed = None 4304 4305 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4306 matched_l_paren = self._match(TokenType.L_PAREN) 4307 4308 if self.TABLESAMPLE_CSV: 4309 num = None 4310 expressions = self._parse_csv(self._parse_primary) 4311 else: 4312 expressions = None 4313 num = ( 4314 self._parse_factor() 4315 if self._match(TokenType.NUMBER, advance=False) 4316 else self._parse_primary() or self._parse_placeholder() 4317 ) 4318 4319 if self._match_text_seq("BUCKET"): 4320 bucket_numerator = self._parse_number() 4321 self._match_text_seq("OUT", "OF") 4322 bucket_denominator = bucket_denominator = self._parse_number() 4323 self._match(TokenType.ON) 4324 bucket_field = self._parse_field() 4325 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4326 percent = num 4327 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4328 size = num 4329 else: 4330 percent = num 4331 4332 if matched_l_paren: 4333 self._match_r_paren() 4334 4335 if self._match(TokenType.L_PAREN): 4336 method = self._parse_var(upper=True) 4337 seed = self._match(TokenType.COMMA) and self._parse_number() 4338 self._match_r_paren() 4339 elif self._match_texts(("SEED", "REPEATABLE")): 4340 seed = self._parse_wrapped(self._parse_number) 4341 4342 if not method and self.DEFAULT_SAMPLING_METHOD: 4343 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4344 4345 return self.expression( 4346 exp.TableSample, 4347 expressions=expressions, 4348 method=method, 4349 bucket_numerator=bucket_numerator, 4350 bucket_denominator=bucket_denominator, 4351 bucket_field=bucket_field, 4352 percent=percent, 4353 size=size, 4354 seed=seed, 4355 ) 4356 4357 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4358 return list(iter(self._parse_pivot, None)) or None 4359 4360 def _parse_joins(self) -> t.Iterator[exp.Join]: 4361 return iter(self._parse_join, None) 4362 4363 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4364 if not self._match(TokenType.INTO): 4365 return None 4366 4367 return self.expression( 4368 exp.UnpivotColumns, 4369 this=self._match_text_seq("NAME") and self._parse_column(), 4370 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4371 ) 4372 4373 # https://duckdb.org/docs/sql/statements/pivot 4374 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4375 def _parse_on() -> t.Optional[exp.Expression]: 4376 this = self._parse_bitwise() 4377 4378 if self._match(TokenType.IN): 4379 # PIVOT ... ON col IN (row_val1, row_val2) 4380 return self._parse_in(this) 4381 if self._match(TokenType.ALIAS, advance=False): 4382 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4383 return self._parse_alias(this) 4384 4385 return this 4386 4387 this = self._parse_table() 4388 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4389 into = self._parse_unpivot_columns() 4390 using = self._match(TokenType.USING) and self._parse_csv( 4391 lambda: self._parse_alias(self._parse_function()) 4392 ) 4393 group = self._parse_group() 4394 4395 return self.expression( 4396 exp.Pivot, 4397 this=this, 4398 expressions=expressions, 4399 using=using, 4400 group=group, 4401 unpivot=is_unpivot, 4402 into=into, 4403 ) 4404 4405 def _parse_pivot_in(self) -> exp.In: 4406 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4407 this = self._parse_select_or_expression() 4408 4409 self._match(TokenType.ALIAS) 4410 alias = self._parse_bitwise() 4411 if alias: 4412 if isinstance(alias, exp.Column) and not alias.db: 4413 alias = alias.this 4414 return self.expression(exp.PivotAlias, this=this, alias=alias) 4415 4416 return this 4417 4418 value = self._parse_column() 4419 4420 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4421 self.raise_error("Expecting IN (") 4422 4423 if self._match(TokenType.ANY): 4424 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4425 else: 4426 exprs = self._parse_csv(_parse_aliased_expression) 4427 4428 self._match_r_paren() 4429 return self.expression(exp.In, this=value, expressions=exprs) 4430 4431 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4432 func = self._parse_function() 4433 if not func: 4434 if self._prev and self._prev.token_type == TokenType.COMMA: 4435 return None 4436 self.raise_error("Expecting an aggregation function in PIVOT") 4437 4438 return self._parse_alias(func) 4439 4440 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4441 index = self._index 4442 include_nulls = None 4443 4444 if self._match(TokenType.PIVOT): 4445 unpivot = False 4446 elif self._match(TokenType.UNPIVOT): 4447 unpivot = True 4448 4449 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4450 if self._match_text_seq("INCLUDE", "NULLS"): 4451 include_nulls = True 4452 elif self._match_text_seq("EXCLUDE", "NULLS"): 4453 include_nulls = False 4454 else: 4455 return None 4456 4457 expressions = [] 4458 4459 if not self._match(TokenType.L_PAREN): 4460 self._retreat(index) 4461 return None 4462 4463 if unpivot: 4464 expressions = self._parse_csv(self._parse_column) 4465 else: 4466 expressions = self._parse_csv(self._parse_pivot_aggregation) 4467 4468 if not expressions: 4469 self.raise_error("Failed to parse PIVOT's aggregation list") 4470 4471 if not self._match(TokenType.FOR): 4472 self.raise_error("Expecting FOR") 4473 4474 fields = [] 4475 while True: 4476 field = self._try_parse(self._parse_pivot_in) 4477 if not field: 4478 break 4479 fields.append(field) 4480 4481 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4482 self._parse_bitwise 4483 ) 4484 4485 group = self._parse_group() 4486 4487 self._match_r_paren() 4488 4489 pivot = self.expression( 4490 exp.Pivot, 4491 expressions=expressions, 4492 fields=fields, 4493 unpivot=unpivot, 4494 include_nulls=include_nulls, 4495 default_on_null=default_on_null, 4496 group=group, 4497 ) 4498 4499 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4500 pivot.set("alias", self._parse_table_alias()) 4501 4502 if not unpivot: 4503 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4504 4505 columns: t.List[exp.Expression] = [] 4506 all_fields = [] 4507 for pivot_field in pivot.fields: 4508 pivot_field_expressions = pivot_field.expressions 4509 4510 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4511 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4512 continue 4513 4514 all_fields.append( 4515 [ 4516 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4517 for fld in pivot_field_expressions 4518 ] 4519 ) 4520 4521 if all_fields: 4522 if names: 4523 all_fields.append(names) 4524 4525 # Generate all possible combinations of the pivot columns 4526 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4527 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4528 for fld_parts_tuple in itertools.product(*all_fields): 4529 fld_parts = list(fld_parts_tuple) 4530 4531 if names and self.PREFIXED_PIVOT_COLUMNS: 4532 # Move the "name" to the front of the list 4533 fld_parts.insert(0, fld_parts.pop(-1)) 4534 4535 columns.append(exp.to_identifier("_".join(fld_parts))) 4536 4537 pivot.set("columns", columns) 4538 4539 return pivot 4540 4541 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4542 return [agg.alias for agg in aggregations if agg.alias] 4543 4544 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4545 if not skip_where_token and not self._match(TokenType.PREWHERE): 4546 return None 4547 4548 return self.expression( 4549 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4550 ) 4551 4552 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4553 if not skip_where_token and not self._match(TokenType.WHERE): 4554 return None 4555 4556 return self.expression( 4557 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4558 ) 4559 4560 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4561 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4562 return None 4563 comments = self._prev_comments 4564 4565 elements: t.Dict[str, t.Any] = defaultdict(list) 4566 4567 if self._match(TokenType.ALL): 4568 elements["all"] = True 4569 elif self._match(TokenType.DISTINCT): 4570 elements["all"] = False 4571 4572 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4573 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4574 4575 while True: 4576 index = self._index 4577 4578 elements["expressions"].extend( 4579 self._parse_csv( 4580 lambda: None 4581 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4582 else self._parse_assignment() 4583 ) 4584 ) 4585 4586 before_with_index = self._index 4587 with_prefix = self._match(TokenType.WITH) 4588 4589 cube_or_rollup = self._parse_cube_or_rollup(with_prefix=with_prefix) 4590 if cube_or_rollup: 4591 key = "rollup" if isinstance(cube_or_rollup, exp.Rollup) else "cube" 4592 elements[key].append(cube_or_rollup) 4593 elif self._match(TokenType.GROUPING_SETS): 4594 elements["grouping_sets"].append( 4595 self.expression( 4596 exp.GroupingSets, 4597 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4598 ) 4599 ) 4600 elif self._match_text_seq("TOTALS"): 4601 elements["totals"] = True # type: ignore 4602 4603 if before_with_index <= self._index <= before_with_index + 1: 4604 self._retreat(before_with_index) 4605 break 4606 4607 if index == self._index: 4608 break 4609 4610 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4611 4612 def _parse_cube_or_rollup(self, with_prefix: bool = False) -> t.Optional[exp.Cube | exp.Rollup]: 4613 if self._match(TokenType.CUBE): 4614 kind: t.Type[exp.Cube | exp.Rollup] = exp.Cube 4615 elif self._match(TokenType.ROLLUP): 4616 kind = exp.Rollup 4617 else: 4618 return None 4619 4620 return self.expression( 4621 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4622 ) 4623 4624 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4625 return self._parse_cube_or_rollup() or self._parse_bitwise() 4626 4627 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4628 if not skip_having_token and not self._match(TokenType.HAVING): 4629 return None 4630 return self.expression( 4631 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4632 ) 4633 4634 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4635 if not self._match(TokenType.QUALIFY): 4636 return None 4637 return self.expression(exp.Qualify, this=self._parse_assignment()) 4638 4639 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4640 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4641 exp.Prior, this=self._parse_bitwise() 4642 ) 4643 connect = self._parse_assignment() 4644 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4645 return connect 4646 4647 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4648 if skip_start_token: 4649 start = None 4650 elif self._match(TokenType.START_WITH): 4651 start = self._parse_assignment() 4652 else: 4653 return None 4654 4655 self._match(TokenType.CONNECT_BY) 4656 nocycle = self._match_text_seq("NOCYCLE") 4657 connect = self._parse_connect_with_prior() 4658 4659 if not start and self._match(TokenType.START_WITH): 4660 start = self._parse_assignment() 4661 4662 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4663 4664 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4665 this = self._parse_id_var(any_token=True) 4666 if self._match(TokenType.ALIAS): 4667 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4668 return this 4669 4670 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4671 if self._match_text_seq("INTERPOLATE"): 4672 return self._parse_wrapped_csv(self._parse_name_as_expression) 4673 return None 4674 4675 def _parse_order( 4676 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4677 ) -> t.Optional[exp.Expression]: 4678 siblings = None 4679 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4680 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4681 return this 4682 4683 siblings = True 4684 4685 return self.expression( 4686 exp.Order, 4687 comments=self._prev_comments, 4688 this=this, 4689 expressions=self._parse_csv(self._parse_ordered), 4690 siblings=siblings, 4691 ) 4692 4693 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4694 if not self._match(token): 4695 return None 4696 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4697 4698 def _parse_ordered( 4699 self, parse_method: t.Optional[t.Callable] = None 4700 ) -> t.Optional[exp.Ordered]: 4701 this = parse_method() if parse_method else self._parse_assignment() 4702 if not this: 4703 return None 4704 4705 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4706 this = exp.var("ALL") 4707 4708 asc = self._match(TokenType.ASC) 4709 desc = self._match(TokenType.DESC) or (asc and False) 4710 4711 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4712 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4713 4714 nulls_first = is_nulls_first or False 4715 explicitly_null_ordered = is_nulls_first or is_nulls_last 4716 4717 if ( 4718 not explicitly_null_ordered 4719 and ( 4720 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4721 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4722 ) 4723 and self.dialect.NULL_ORDERING != "nulls_are_last" 4724 ): 4725 nulls_first = True 4726 4727 if self._match_text_seq("WITH", "FILL"): 4728 with_fill = self.expression( 4729 exp.WithFill, 4730 **{ # type: ignore 4731 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4732 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4733 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4734 "interpolate": self._parse_interpolate(), 4735 }, 4736 ) 4737 else: 4738 with_fill = None 4739 4740 return self.expression( 4741 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4742 ) 4743 4744 def _parse_limit_options(self) -> t.Optional[exp.LimitOptions]: 4745 percent = self._match_set((TokenType.PERCENT, TokenType.MOD)) 4746 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4747 self._match_text_seq("ONLY") 4748 with_ties = self._match_text_seq("WITH", "TIES") 4749 4750 if not (percent or rows or with_ties): 4751 return None 4752 4753 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4754 4755 def _parse_limit( 4756 self, 4757 this: t.Optional[exp.Expression] = None, 4758 top: bool = False, 4759 skip_limit_token: bool = False, 4760 ) -> t.Optional[exp.Expression]: 4761 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4762 comments = self._prev_comments 4763 if top: 4764 limit_paren = self._match(TokenType.L_PAREN) 4765 expression = self._parse_term() if limit_paren else self._parse_number() 4766 4767 if limit_paren: 4768 self._match_r_paren() 4769 4770 else: 4771 # Parsing LIMIT x% (i.e x PERCENT) as a term leads to an error, since 4772 # we try to build an exp.Mod expr. For that matter, we backtrack and instead 4773 # consume the factor plus parse the percentage separately 4774 expression = self._try_parse(self._parse_term) or self._parse_factor() 4775 4776 limit_options = self._parse_limit_options() 4777 4778 if self._match(TokenType.COMMA): 4779 offset = expression 4780 expression = self._parse_term() 4781 else: 4782 offset = None 4783 4784 limit_exp = self.expression( 4785 exp.Limit, 4786 this=this, 4787 expression=expression, 4788 offset=offset, 4789 comments=comments, 4790 limit_options=limit_options, 4791 expressions=self._parse_limit_by(), 4792 ) 4793 4794 return limit_exp 4795 4796 if self._match(TokenType.FETCH): 4797 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4798 direction = self._prev.text.upper() if direction else "FIRST" 4799 4800 count = self._parse_field(tokens=self.FETCH_TOKENS) 4801 4802 return self.expression( 4803 exp.Fetch, 4804 direction=direction, 4805 count=count, 4806 limit_options=self._parse_limit_options(), 4807 ) 4808 4809 return this 4810 4811 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4812 if not self._match(TokenType.OFFSET): 4813 return this 4814 4815 count = self._parse_term() 4816 self._match_set((TokenType.ROW, TokenType.ROWS)) 4817 4818 return self.expression( 4819 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4820 ) 4821 4822 def _can_parse_limit_or_offset(self) -> bool: 4823 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4824 return False 4825 4826 index = self._index 4827 result = bool( 4828 self._try_parse(self._parse_limit, retreat=True) 4829 or self._try_parse(self._parse_offset, retreat=True) 4830 ) 4831 self._retreat(index) 4832 return result 4833 4834 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4835 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4836 4837 def _parse_locks(self) -> t.List[exp.Lock]: 4838 locks = [] 4839 while True: 4840 update, key = None, None 4841 if self._match_text_seq("FOR", "UPDATE"): 4842 update = True 4843 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4844 "LOCK", "IN", "SHARE", "MODE" 4845 ): 4846 update = False 4847 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4848 update, key = False, True 4849 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4850 update, key = True, True 4851 else: 4852 break 4853 4854 expressions = None 4855 if self._match_text_seq("OF"): 4856 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4857 4858 wait: t.Optional[bool | exp.Expression] = None 4859 if self._match_text_seq("NOWAIT"): 4860 wait = True 4861 elif self._match_text_seq("WAIT"): 4862 wait = self._parse_primary() 4863 elif self._match_text_seq("SKIP", "LOCKED"): 4864 wait = False 4865 4866 locks.append( 4867 self.expression( 4868 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4869 ) 4870 ) 4871 4872 return locks 4873 4874 def parse_set_operation( 4875 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4876 ) -> t.Optional[exp.Expression]: 4877 start = self._index 4878 _, side_token, kind_token = self._parse_join_parts() 4879 4880 side = side_token.text if side_token else None 4881 kind = kind_token.text if kind_token else None 4882 4883 if not self._match_set(self.SET_OPERATIONS): 4884 self._retreat(start) 4885 return None 4886 4887 token_type = self._prev.token_type 4888 4889 if token_type == TokenType.UNION: 4890 operation: t.Type[exp.SetOperation] = exp.Union 4891 elif token_type == TokenType.EXCEPT: 4892 operation = exp.Except 4893 else: 4894 operation = exp.Intersect 4895 4896 comments = self._prev.comments 4897 4898 if self._match(TokenType.DISTINCT): 4899 distinct: t.Optional[bool] = True 4900 elif self._match(TokenType.ALL): 4901 distinct = False 4902 else: 4903 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4904 if distinct is None: 4905 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4906 4907 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4908 "STRICT", "CORRESPONDING" 4909 ) 4910 if self._match_text_seq("CORRESPONDING"): 4911 by_name = True 4912 if not side and not kind: 4913 kind = "INNER" 4914 4915 on_column_list = None 4916 if by_name and self._match_texts(("ON", "BY")): 4917 on_column_list = self._parse_wrapped_csv(self._parse_column) 4918 4919 expression = self._parse_select( 4920 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4921 ) 4922 4923 return self.expression( 4924 operation, 4925 comments=comments, 4926 this=this, 4927 distinct=distinct, 4928 by_name=by_name, 4929 expression=expression, 4930 side=side, 4931 kind=kind, 4932 on=on_column_list, 4933 ) 4934 4935 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4936 while this: 4937 setop = self.parse_set_operation(this) 4938 if not setop: 4939 break 4940 this = setop 4941 4942 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4943 expression = this.expression 4944 4945 if expression: 4946 for arg in self.SET_OP_MODIFIERS: 4947 expr = expression.args.get(arg) 4948 if expr: 4949 this.set(arg, expr.pop()) 4950 4951 return this 4952 4953 def _parse_expression(self) -> t.Optional[exp.Expression]: 4954 return self._parse_alias(self._parse_assignment()) 4955 4956 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4957 this = self._parse_disjunction() 4958 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4959 # This allows us to parse <non-identifier token> := <expr> 4960 this = exp.column( 4961 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4962 ) 4963 4964 while self._match_set(self.ASSIGNMENT): 4965 if isinstance(this, exp.Column) and len(this.parts) == 1: 4966 this = this.this 4967 4968 this = self.expression( 4969 self.ASSIGNMENT[self._prev.token_type], 4970 this=this, 4971 comments=self._prev_comments, 4972 expression=self._parse_assignment(), 4973 ) 4974 4975 return this 4976 4977 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4978 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4979 4980 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4981 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4982 4983 def _parse_equality(self) -> t.Optional[exp.Expression]: 4984 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4985 4986 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4987 return self._parse_tokens(self._parse_range, self.COMPARISON) 4988 4989 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4990 this = this or self._parse_bitwise() 4991 negate = self._match(TokenType.NOT) 4992 4993 if self._match_set(self.RANGE_PARSERS): 4994 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4995 if not expression: 4996 return this 4997 4998 this = expression 4999 elif self._match(TokenType.ISNULL): 5000 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5001 5002 # Postgres supports ISNULL and NOTNULL for conditions. 5003 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 5004 if self._match(TokenType.NOTNULL): 5005 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5006 this = self.expression(exp.Not, this=this) 5007 5008 if negate: 5009 this = self._negate_range(this) 5010 5011 if self._match(TokenType.IS): 5012 this = self._parse_is(this) 5013 5014 return this 5015 5016 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5017 if not this: 5018 return this 5019 5020 return self.expression(exp.Not, this=this) 5021 5022 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5023 index = self._index - 1 5024 negate = self._match(TokenType.NOT) 5025 5026 if self._match_text_seq("DISTINCT", "FROM"): 5027 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5028 return self.expression(klass, this=this, expression=self._parse_bitwise()) 5029 5030 if self._match(TokenType.JSON): 5031 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5032 5033 if self._match_text_seq("WITH"): 5034 _with = True 5035 elif self._match_text_seq("WITHOUT"): 5036 _with = False 5037 else: 5038 _with = None 5039 5040 unique = self._match(TokenType.UNIQUE) 5041 self._match_text_seq("KEYS") 5042 expression: t.Optional[exp.Expression] = self.expression( 5043 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5044 ) 5045 else: 5046 expression = self._parse_primary() or self._parse_null() 5047 if not expression: 5048 self._retreat(index) 5049 return None 5050 5051 this = self.expression(exp.Is, this=this, expression=expression) 5052 return self.expression(exp.Not, this=this) if negate else this 5053 5054 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5055 unnest = self._parse_unnest(with_alias=False) 5056 if unnest: 5057 this = self.expression(exp.In, this=this, unnest=unnest) 5058 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5059 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5060 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5061 5062 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5063 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5064 else: 5065 this = self.expression(exp.In, this=this, expressions=expressions) 5066 5067 if matched_l_paren: 5068 self._match_r_paren(this) 5069 elif not self._match(TokenType.R_BRACKET, expression=this): 5070 self.raise_error("Expecting ]") 5071 else: 5072 this = self.expression(exp.In, this=this, field=self._parse_column()) 5073 5074 return this 5075 5076 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5077 symmetric = None 5078 if self._match_text_seq("SYMMETRIC"): 5079 symmetric = True 5080 elif self._match_text_seq("ASYMMETRIC"): 5081 symmetric = False 5082 5083 low = self._parse_bitwise() 5084 self._match(TokenType.AND) 5085 high = self._parse_bitwise() 5086 5087 return self.expression( 5088 exp.Between, 5089 this=this, 5090 low=low, 5091 high=high, 5092 symmetric=symmetric, 5093 ) 5094 5095 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5096 if not self._match(TokenType.ESCAPE): 5097 return this 5098 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5099 5100 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5101 index = self._index 5102 5103 if not self._match(TokenType.INTERVAL) and match_interval: 5104 return None 5105 5106 if self._match(TokenType.STRING, advance=False): 5107 this = self._parse_primary() 5108 else: 5109 this = self._parse_term() 5110 5111 if not this or ( 5112 isinstance(this, exp.Column) 5113 and not this.table 5114 and not this.this.quoted 5115 and self._curr 5116 and self._curr.text.upper() not in self.dialect.VALID_INTERVAL_UNITS 5117 ): 5118 self._retreat(index) 5119 return None 5120 5121 # handle day-time format interval span with omitted units: 5122 # INTERVAL '<number days> hh[:][mm[:ss[.ff]]]' <maybe `unit TO unit`> 5123 interval_span_units_omitted = None 5124 if ( 5125 this 5126 and this.is_string 5127 and self.SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT 5128 and exp.INTERVAL_DAY_TIME_RE.match(this.name) 5129 ): 5130 index = self._index 5131 5132 # Var "TO" Var 5133 first_unit = self._parse_var(any_token=True, upper=True) 5134 second_unit = None 5135 if first_unit and self._match_text_seq("TO"): 5136 second_unit = self._parse_var(any_token=True, upper=True) 5137 5138 interval_span_units_omitted = not (first_unit and second_unit) 5139 5140 self._retreat(index) 5141 5142 unit = ( 5143 None 5144 if interval_span_units_omitted 5145 else ( 5146 self._parse_function() 5147 or ( 5148 not self._match(TokenType.ALIAS, advance=False) 5149 and self._parse_var(any_token=True, upper=True) 5150 ) 5151 ) 5152 ) 5153 5154 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5155 # each INTERVAL expression into this canonical form so it's easy to transpile 5156 if this and this.is_number: 5157 this = exp.Literal.string(this.to_py()) 5158 elif this and this.is_string: 5159 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5160 if parts and unit: 5161 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5162 unit = None 5163 self._retreat(self._index - 1) 5164 5165 if len(parts) == 1: 5166 this = exp.Literal.string(parts[0][0]) 5167 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5168 5169 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5170 unit = self.expression( 5171 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5172 ) 5173 5174 interval = self.expression(exp.Interval, this=this, unit=unit) 5175 5176 index = self._index 5177 self._match(TokenType.PLUS) 5178 5179 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5180 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5181 return self.expression( 5182 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5183 ) 5184 5185 self._retreat(index) 5186 return interval 5187 5188 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5189 this = self._parse_term() 5190 5191 while True: 5192 if self._match_set(self.BITWISE): 5193 this = self.expression( 5194 self.BITWISE[self._prev.token_type], 5195 this=this, 5196 expression=self._parse_term(), 5197 ) 5198 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5199 this = self.expression( 5200 exp.DPipe, 5201 this=this, 5202 expression=self._parse_term(), 5203 safe=not self.dialect.STRICT_STRING_CONCAT, 5204 ) 5205 elif self._match(TokenType.DQMARK): 5206 this = self.expression( 5207 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5208 ) 5209 elif self._match_pair(TokenType.LT, TokenType.LT): 5210 this = self.expression( 5211 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5212 ) 5213 elif self._match_pair(TokenType.GT, TokenType.GT): 5214 this = self.expression( 5215 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5216 ) 5217 else: 5218 break 5219 5220 return this 5221 5222 def _parse_term(self) -> t.Optional[exp.Expression]: 5223 this = self._parse_factor() 5224 5225 while self._match_set(self.TERM): 5226 klass = self.TERM[self._prev.token_type] 5227 comments = self._prev_comments 5228 expression = self._parse_factor() 5229 5230 this = self.expression(klass, this=this, comments=comments, expression=expression) 5231 5232 if isinstance(this, exp.Collate): 5233 expr = this.expression 5234 5235 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5236 # fallback to Identifier / Var 5237 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5238 ident = expr.this 5239 if isinstance(ident, exp.Identifier): 5240 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5241 5242 return this 5243 5244 def _parse_factor(self) -> t.Optional[exp.Expression]: 5245 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5246 this = parse_method() 5247 5248 while self._match_set(self.FACTOR): 5249 klass = self.FACTOR[self._prev.token_type] 5250 comments = self._prev_comments 5251 expression = parse_method() 5252 5253 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5254 self._retreat(self._index - 1) 5255 return this 5256 5257 this = self.expression(klass, this=this, comments=comments, expression=expression) 5258 5259 if isinstance(this, exp.Div): 5260 this.args["typed"] = self.dialect.TYPED_DIVISION 5261 this.args["safe"] = self.dialect.SAFE_DIVISION 5262 5263 return this 5264 5265 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5266 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5267 5268 def _parse_unary(self) -> t.Optional[exp.Expression]: 5269 if self._match_set(self.UNARY_PARSERS): 5270 return self.UNARY_PARSERS[self._prev.token_type](self) 5271 return self._parse_at_time_zone(self._parse_type()) 5272 5273 def _parse_type( 5274 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5275 ) -> t.Optional[exp.Expression]: 5276 interval = parse_interval and self._parse_interval() 5277 if interval: 5278 return interval 5279 5280 index = self._index 5281 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5282 5283 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5284 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5285 if isinstance(data_type, exp.Cast): 5286 # This constructor can contain ops directly after it, for instance struct unnesting: 5287 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5288 return self._parse_column_ops(data_type) 5289 5290 if data_type: 5291 index2 = self._index 5292 this = self._parse_primary() 5293 5294 if isinstance(this, exp.Literal): 5295 literal = this.name 5296 this = self._parse_column_ops(this) 5297 5298 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5299 if parser: 5300 return parser(self, this, data_type) 5301 5302 if ( 5303 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5304 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5305 and TIME_ZONE_RE.search(literal) 5306 ): 5307 data_type = exp.DataType.build("TIMESTAMPTZ") 5308 5309 return self.expression(exp.Cast, this=this, to=data_type) 5310 5311 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5312 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5313 # 5314 # If the index difference here is greater than 1, that means the parser itself must have 5315 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5316 # 5317 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5318 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5319 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5320 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5321 # 5322 # In these cases, we don't really want to return the converted type, but instead retreat 5323 # and try to parse a Column or Identifier in the section below. 5324 if data_type.expressions and index2 - index > 1: 5325 self._retreat(index2) 5326 return self._parse_column_ops(data_type) 5327 5328 self._retreat(index) 5329 5330 if fallback_to_identifier: 5331 return self._parse_id_var() 5332 5333 this = self._parse_column() 5334 return this and self._parse_column_ops(this) 5335 5336 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5337 this = self._parse_type() 5338 if not this: 5339 return None 5340 5341 if isinstance(this, exp.Column) and not this.table: 5342 this = exp.var(this.name.upper()) 5343 5344 return self.expression( 5345 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5346 ) 5347 5348 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5349 type_name = identifier.name 5350 5351 while self._match(TokenType.DOT): 5352 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5353 5354 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5355 5356 def _parse_types( 5357 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5358 ) -> t.Optional[exp.Expression]: 5359 index = self._index 5360 5361 this: t.Optional[exp.Expression] = None 5362 prefix = self._match_text_seq("SYSUDTLIB", ".") 5363 5364 if self._match_set(self.TYPE_TOKENS): 5365 type_token = self._prev.token_type 5366 else: 5367 type_token = None 5368 identifier = allow_identifiers and self._parse_id_var( 5369 any_token=False, tokens=(TokenType.VAR,) 5370 ) 5371 if isinstance(identifier, exp.Identifier): 5372 try: 5373 tokens = self.dialect.tokenize(identifier.name) 5374 except TokenError: 5375 tokens = None 5376 5377 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5378 type_token = tokens[0].token_type 5379 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5380 this = self._parse_user_defined_type(identifier) 5381 else: 5382 self._retreat(self._index - 1) 5383 return None 5384 else: 5385 return None 5386 5387 if type_token == TokenType.PSEUDO_TYPE: 5388 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5389 5390 if type_token == TokenType.OBJECT_IDENTIFIER: 5391 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5392 5393 # https://materialize.com/docs/sql/types/map/ 5394 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5395 key_type = self._parse_types( 5396 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5397 ) 5398 if not self._match(TokenType.FARROW): 5399 self._retreat(index) 5400 return None 5401 5402 value_type = self._parse_types( 5403 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5404 ) 5405 if not self._match(TokenType.R_BRACKET): 5406 self._retreat(index) 5407 return None 5408 5409 return exp.DataType( 5410 this=exp.DataType.Type.MAP, 5411 expressions=[key_type, value_type], 5412 nested=True, 5413 prefix=prefix, 5414 ) 5415 5416 nested = type_token in self.NESTED_TYPE_TOKENS 5417 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5418 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5419 expressions = None 5420 maybe_func = False 5421 5422 if self._match(TokenType.L_PAREN): 5423 if is_struct: 5424 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5425 elif nested: 5426 expressions = self._parse_csv( 5427 lambda: self._parse_types( 5428 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5429 ) 5430 ) 5431 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5432 this = expressions[0] 5433 this.set("nullable", True) 5434 self._match_r_paren() 5435 return this 5436 elif type_token in self.ENUM_TYPE_TOKENS: 5437 expressions = self._parse_csv(self._parse_equality) 5438 elif is_aggregate: 5439 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5440 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5441 ) 5442 if not func_or_ident: 5443 return None 5444 expressions = [func_or_ident] 5445 if self._match(TokenType.COMMA): 5446 expressions.extend( 5447 self._parse_csv( 5448 lambda: self._parse_types( 5449 check_func=check_func, 5450 schema=schema, 5451 allow_identifiers=allow_identifiers, 5452 ) 5453 ) 5454 ) 5455 else: 5456 expressions = self._parse_csv(self._parse_type_size) 5457 5458 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5459 if type_token == TokenType.VECTOR and len(expressions) == 2: 5460 expressions = self._parse_vector_expressions(expressions) 5461 5462 if not self._match(TokenType.R_PAREN): 5463 self._retreat(index) 5464 return None 5465 5466 maybe_func = True 5467 5468 values: t.Optional[t.List[exp.Expression]] = None 5469 5470 if nested and self._match(TokenType.LT): 5471 if is_struct: 5472 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5473 else: 5474 expressions = self._parse_csv( 5475 lambda: self._parse_types( 5476 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5477 ) 5478 ) 5479 5480 if not self._match(TokenType.GT): 5481 self.raise_error("Expecting >") 5482 5483 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5484 values = self._parse_csv(self._parse_assignment) 5485 if not values and is_struct: 5486 values = None 5487 self._retreat(self._index - 1) 5488 else: 5489 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5490 5491 if type_token in self.TIMESTAMPS: 5492 if self._match_text_seq("WITH", "TIME", "ZONE"): 5493 maybe_func = False 5494 tz_type = ( 5495 exp.DataType.Type.TIMETZ 5496 if type_token in self.TIMES 5497 else exp.DataType.Type.TIMESTAMPTZ 5498 ) 5499 this = exp.DataType(this=tz_type, expressions=expressions) 5500 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5501 maybe_func = False 5502 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5503 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5504 maybe_func = False 5505 elif type_token == TokenType.INTERVAL: 5506 unit = self._parse_var(upper=True) 5507 if unit: 5508 if self._match_text_seq("TO"): 5509 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5510 5511 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5512 else: 5513 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5514 elif type_token == TokenType.VOID: 5515 this = exp.DataType(this=exp.DataType.Type.NULL) 5516 5517 if maybe_func and check_func: 5518 index2 = self._index 5519 peek = self._parse_string() 5520 5521 if not peek: 5522 self._retreat(index) 5523 return None 5524 5525 self._retreat(index2) 5526 5527 if not this: 5528 if self._match_text_seq("UNSIGNED"): 5529 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5530 if not unsigned_type_token: 5531 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5532 5533 type_token = unsigned_type_token or type_token 5534 5535 # NULLABLE without parentheses can be a column (Presto/Trino) 5536 if type_token == TokenType.NULLABLE and not expressions: 5537 self._retreat(index) 5538 return None 5539 5540 this = exp.DataType( 5541 this=exp.DataType.Type[type_token.value], 5542 expressions=expressions, 5543 nested=nested, 5544 prefix=prefix, 5545 ) 5546 5547 # Empty arrays/structs are allowed 5548 if values is not None: 5549 cls = exp.Struct if is_struct else exp.Array 5550 this = exp.cast(cls(expressions=values), this, copy=False) 5551 5552 elif expressions: 5553 this.set("expressions", expressions) 5554 5555 # https://materialize.com/docs/sql/types/list/#type-name 5556 while self._match(TokenType.LIST): 5557 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5558 5559 index = self._index 5560 5561 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5562 matched_array = self._match(TokenType.ARRAY) 5563 5564 while self._curr: 5565 datatype_token = self._prev.token_type 5566 matched_l_bracket = self._match(TokenType.L_BRACKET) 5567 5568 if (not matched_l_bracket and not matched_array) or ( 5569 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5570 ): 5571 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5572 # not to be confused with the fixed size array parsing 5573 break 5574 5575 matched_array = False 5576 values = self._parse_csv(self._parse_assignment) or None 5577 if ( 5578 values 5579 and not schema 5580 and ( 5581 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5582 ) 5583 ): 5584 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5585 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5586 self._retreat(index) 5587 break 5588 5589 this = exp.DataType( 5590 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5591 ) 5592 self._match(TokenType.R_BRACKET) 5593 5594 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5595 converter = self.TYPE_CONVERTERS.get(this.this) 5596 if converter: 5597 this = converter(t.cast(exp.DataType, this)) 5598 5599 return this 5600 5601 def _parse_vector_expressions( 5602 self, expressions: t.List[exp.Expression] 5603 ) -> t.List[exp.Expression]: 5604 return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] 5605 5606 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5607 index = self._index 5608 5609 if ( 5610 self._curr 5611 and self._next 5612 and self._curr.token_type in self.TYPE_TOKENS 5613 and self._next.token_type in self.TYPE_TOKENS 5614 ): 5615 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5616 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5617 this = self._parse_id_var() 5618 else: 5619 this = ( 5620 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5621 or self._parse_id_var() 5622 ) 5623 5624 self._match(TokenType.COLON) 5625 5626 if ( 5627 type_required 5628 and not isinstance(this, exp.DataType) 5629 and not self._match_set(self.TYPE_TOKENS, advance=False) 5630 ): 5631 self._retreat(index) 5632 return self._parse_types() 5633 5634 return self._parse_column_def(this) 5635 5636 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5637 if not self._match_text_seq("AT", "TIME", "ZONE"): 5638 return this 5639 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5640 5641 def _parse_column(self) -> t.Optional[exp.Expression]: 5642 this = self._parse_column_reference() 5643 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5644 5645 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5646 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5647 5648 return column 5649 5650 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5651 this = self._parse_field() 5652 if ( 5653 not this 5654 and self._match(TokenType.VALUES, advance=False) 5655 and self.VALUES_FOLLOWED_BY_PAREN 5656 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5657 ): 5658 this = self._parse_id_var() 5659 5660 if isinstance(this, exp.Identifier): 5661 # We bubble up comments from the Identifier to the Column 5662 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5663 5664 return this 5665 5666 def _parse_colon_as_variant_extract( 5667 self, this: t.Optional[exp.Expression] 5668 ) -> t.Optional[exp.Expression]: 5669 casts = [] 5670 json_path = [] 5671 escape = None 5672 5673 while self._match(TokenType.COLON): 5674 start_index = self._index 5675 5676 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5677 path = self._parse_column_ops( 5678 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5679 ) 5680 5681 # The cast :: operator has a lower precedence than the extraction operator :, so 5682 # we rearrange the AST appropriately to avoid casting the JSON path 5683 while isinstance(path, exp.Cast): 5684 casts.append(path.to) 5685 path = path.this 5686 5687 if casts: 5688 dcolon_offset = next( 5689 i 5690 for i, t in enumerate(self._tokens[start_index:]) 5691 if t.token_type == TokenType.DCOLON 5692 ) 5693 end_token = self._tokens[start_index + dcolon_offset - 1] 5694 else: 5695 end_token = self._prev 5696 5697 if path: 5698 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5699 # it'll roundtrip to a string literal in GET_PATH 5700 if isinstance(path, exp.Identifier) and path.quoted: 5701 escape = True 5702 5703 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5704 5705 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5706 # Databricks transforms it back to the colon/dot notation 5707 if json_path: 5708 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5709 5710 if json_path_expr: 5711 json_path_expr.set("escape", escape) 5712 5713 this = self.expression( 5714 exp.JSONExtract, 5715 this=this, 5716 expression=json_path_expr, 5717 variant_extract=True, 5718 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5719 ) 5720 5721 while casts: 5722 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5723 5724 return this 5725 5726 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5727 return self._parse_types() 5728 5729 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5730 this = self._parse_bracket(this) 5731 5732 while self._match_set(self.COLUMN_OPERATORS): 5733 op_token = self._prev.token_type 5734 op = self.COLUMN_OPERATORS.get(op_token) 5735 5736 if op_token in self.CAST_COLUMN_OPERATORS: 5737 field = self._parse_dcolon() 5738 if not field: 5739 self.raise_error("Expected type") 5740 elif op and self._curr: 5741 field = self._parse_column_reference() or self._parse_bitwise() 5742 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5743 field = self._parse_column_ops(field) 5744 else: 5745 field = self._parse_field(any_token=True, anonymous_func=True) 5746 5747 # Function calls can be qualified, e.g., x.y.FOO() 5748 # This converts the final AST to a series of Dots leading to the function call 5749 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5750 if isinstance(field, (exp.Func, exp.Window)) and this: 5751 this = this.transform( 5752 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5753 ) 5754 5755 if op: 5756 this = op(self, this, field) 5757 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5758 this = self.expression( 5759 exp.Column, 5760 comments=this.comments, 5761 this=field, 5762 table=this.this, 5763 db=this.args.get("table"), 5764 catalog=this.args.get("db"), 5765 ) 5766 elif isinstance(field, exp.Window): 5767 # Move the exp.Dot's to the window's function 5768 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5769 field.set("this", window_func) 5770 this = field 5771 else: 5772 this = self.expression(exp.Dot, this=this, expression=field) 5773 5774 if field and field.comments: 5775 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5776 5777 this = self._parse_bracket(this) 5778 5779 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5780 5781 def _parse_paren(self) -> t.Optional[exp.Expression]: 5782 if not self._match(TokenType.L_PAREN): 5783 return None 5784 5785 comments = self._prev_comments 5786 query = self._parse_select() 5787 5788 if query: 5789 expressions = [query] 5790 else: 5791 expressions = self._parse_expressions() 5792 5793 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5794 5795 if not this and self._match(TokenType.R_PAREN, advance=False): 5796 this = self.expression(exp.Tuple) 5797 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5798 this = self._parse_subquery(this=this, parse_alias=False) 5799 elif isinstance(this, exp.Subquery): 5800 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5801 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5802 this = self.expression(exp.Tuple, expressions=expressions) 5803 else: 5804 this = self.expression(exp.Paren, this=this) 5805 5806 if this: 5807 this.add_comments(comments) 5808 5809 self._match_r_paren(expression=this) 5810 5811 if isinstance(this, exp.Paren) and isinstance(this.this, exp.AggFunc): 5812 return self._parse_window(this) 5813 5814 return this 5815 5816 def _parse_primary(self) -> t.Optional[exp.Expression]: 5817 if self._match_set(self.PRIMARY_PARSERS): 5818 token_type = self._prev.token_type 5819 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5820 5821 if token_type == TokenType.STRING: 5822 expressions = [primary] 5823 while self._match(TokenType.STRING): 5824 expressions.append(exp.Literal.string(self._prev.text)) 5825 5826 if len(expressions) > 1: 5827 return self.expression(exp.Concat, expressions=expressions) 5828 5829 return primary 5830 5831 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5832 return exp.Literal.number(f"0.{self._prev.text}") 5833 5834 return self._parse_paren() 5835 5836 def _parse_field( 5837 self, 5838 any_token: bool = False, 5839 tokens: t.Optional[t.Collection[TokenType]] = None, 5840 anonymous_func: bool = False, 5841 ) -> t.Optional[exp.Expression]: 5842 if anonymous_func: 5843 field = ( 5844 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5845 or self._parse_primary() 5846 ) 5847 else: 5848 field = self._parse_primary() or self._parse_function( 5849 anonymous=anonymous_func, any_token=any_token 5850 ) 5851 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5852 5853 def _parse_function( 5854 self, 5855 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5856 anonymous: bool = False, 5857 optional_parens: bool = True, 5858 any_token: bool = False, 5859 ) -> t.Optional[exp.Expression]: 5860 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5861 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5862 fn_syntax = False 5863 if ( 5864 self._match(TokenType.L_BRACE, advance=False) 5865 and self._next 5866 and self._next.text.upper() == "FN" 5867 ): 5868 self._advance(2) 5869 fn_syntax = True 5870 5871 func = self._parse_function_call( 5872 functions=functions, 5873 anonymous=anonymous, 5874 optional_parens=optional_parens, 5875 any_token=any_token, 5876 ) 5877 5878 if fn_syntax: 5879 self._match(TokenType.R_BRACE) 5880 5881 return func 5882 5883 def _parse_function_args(self, alias: bool = False) -> t.List[exp.Expression]: 5884 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5885 5886 def _parse_function_call( 5887 self, 5888 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5889 anonymous: bool = False, 5890 optional_parens: bool = True, 5891 any_token: bool = False, 5892 ) -> t.Optional[exp.Expression]: 5893 if not self._curr: 5894 return None 5895 5896 comments = self._curr.comments 5897 prev = self._prev 5898 token = self._curr 5899 token_type = self._curr.token_type 5900 this = self._curr.text 5901 upper = this.upper() 5902 5903 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5904 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5905 self._advance() 5906 return self._parse_window(parser(self)) 5907 5908 if not self._next or self._next.token_type != TokenType.L_PAREN: 5909 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5910 self._advance() 5911 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5912 5913 return None 5914 5915 if any_token: 5916 if token_type in self.RESERVED_TOKENS: 5917 return None 5918 elif token_type not in self.FUNC_TOKENS: 5919 return None 5920 5921 self._advance(2) 5922 5923 parser = self.FUNCTION_PARSERS.get(upper) 5924 if parser and not anonymous: 5925 this = parser(self) 5926 else: 5927 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5928 5929 if subquery_predicate: 5930 expr = None 5931 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5932 expr = self._parse_select() 5933 self._match_r_paren() 5934 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5935 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5936 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5937 self._advance(-1) 5938 expr = self._parse_bitwise() 5939 5940 if expr: 5941 return self.expression(subquery_predicate, comments=comments, this=expr) 5942 5943 if functions is None: 5944 functions = self.FUNCTIONS 5945 5946 function = functions.get(upper) 5947 known_function = function and not anonymous 5948 5949 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5950 args = self._parse_function_args(alias) 5951 5952 post_func_comments = self._curr and self._curr.comments 5953 if known_function and post_func_comments: 5954 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5955 # call we'll construct it as exp.Anonymous, even if it's "known" 5956 if any( 5957 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5958 for comment in post_func_comments 5959 ): 5960 known_function = False 5961 5962 if alias and known_function: 5963 args = self._kv_to_prop_eq(args) 5964 5965 if known_function: 5966 func_builder = t.cast(t.Callable, function) 5967 5968 if "dialect" in func_builder.__code__.co_varnames: 5969 func = func_builder(args, dialect=self.dialect) 5970 else: 5971 func = func_builder(args) 5972 5973 func = self.validate_expression(func, args) 5974 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5975 func.meta["name"] = this 5976 5977 this = func 5978 else: 5979 if token_type == TokenType.IDENTIFIER: 5980 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5981 5982 this = self.expression(exp.Anonymous, this=this, expressions=args) 5983 this = this.update_positions(token) 5984 5985 if isinstance(this, exp.Expression): 5986 this.add_comments(comments) 5987 5988 self._match_r_paren(this) 5989 return self._parse_window(this) 5990 5991 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5992 return expression 5993 5994 def _kv_to_prop_eq( 5995 self, expressions: t.List[exp.Expression], parse_map: bool = False 5996 ) -> t.List[exp.Expression]: 5997 transformed = [] 5998 5999 for index, e in enumerate(expressions): 6000 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 6001 if isinstance(e, exp.Alias): 6002 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 6003 6004 if not isinstance(e, exp.PropertyEQ): 6005 e = self.expression( 6006 exp.PropertyEQ, 6007 this=e.this if parse_map else exp.to_identifier(e.this.name), 6008 expression=e.expression, 6009 ) 6010 6011 if isinstance(e.this, exp.Column): 6012 e.this.replace(e.this.this) 6013 else: 6014 e = self._to_prop_eq(e, index) 6015 6016 transformed.append(e) 6017 6018 return transformed 6019 6020 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 6021 return self._parse_statement() 6022 6023 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 6024 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 6025 6026 def _parse_user_defined_function( 6027 self, kind: t.Optional[TokenType] = None 6028 ) -> t.Optional[exp.Expression]: 6029 this = self._parse_table_parts(schema=True) 6030 6031 if not self._match(TokenType.L_PAREN): 6032 return this 6033 6034 expressions = self._parse_csv(self._parse_function_parameter) 6035 self._match_r_paren() 6036 return self.expression( 6037 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 6038 ) 6039 6040 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 6041 literal = self._parse_primary() 6042 if literal: 6043 return self.expression(exp.Introducer, this=token.text, expression=literal) 6044 6045 return self._identifier_expression(token) 6046 6047 def _parse_session_parameter(self) -> exp.SessionParameter: 6048 kind = None 6049 this = self._parse_id_var() or self._parse_primary() 6050 6051 if this and self._match(TokenType.DOT): 6052 kind = this.name 6053 this = self._parse_var() or self._parse_primary() 6054 6055 return self.expression(exp.SessionParameter, this=this, kind=kind) 6056 6057 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 6058 return self._parse_id_var() 6059 6060 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 6061 index = self._index 6062 6063 if self._match(TokenType.L_PAREN): 6064 expressions = t.cast( 6065 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 6066 ) 6067 6068 if not self._match(TokenType.R_PAREN): 6069 self._retreat(index) 6070 else: 6071 expressions = [self._parse_lambda_arg()] 6072 6073 if self._match_set(self.LAMBDAS): 6074 return self.LAMBDAS[self._prev.token_type](self, expressions) 6075 6076 self._retreat(index) 6077 6078 this: t.Optional[exp.Expression] 6079 6080 if self._match(TokenType.DISTINCT): 6081 this = self.expression( 6082 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6083 ) 6084 else: 6085 this = self._parse_select_or_expression(alias=alias) 6086 6087 return self._parse_limit( 6088 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6089 ) 6090 6091 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6092 index = self._index 6093 if not self._match(TokenType.L_PAREN): 6094 return this 6095 6096 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6097 # expr can be of both types 6098 if self._match_set(self.SELECT_START_TOKENS): 6099 self._retreat(index) 6100 return this 6101 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6102 self._match_r_paren() 6103 return self.expression(exp.Schema, this=this, expressions=args) 6104 6105 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6106 return self._parse_column_def(self._parse_field(any_token=True)) 6107 6108 def _parse_column_def( 6109 self, this: t.Optional[exp.Expression], computed_column: bool = True 6110 ) -> t.Optional[exp.Expression]: 6111 # column defs are not really columns, they're identifiers 6112 if isinstance(this, exp.Column): 6113 this = this.this 6114 6115 if not computed_column: 6116 self._match(TokenType.ALIAS) 6117 6118 kind = self._parse_types(schema=True) 6119 6120 if self._match_text_seq("FOR", "ORDINALITY"): 6121 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6122 6123 constraints: t.List[exp.Expression] = [] 6124 6125 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6126 ("ALIAS", "MATERIALIZED") 6127 ): 6128 persisted = self._prev.text.upper() == "MATERIALIZED" 6129 constraint_kind = exp.ComputedColumnConstraint( 6130 this=self._parse_assignment(), 6131 persisted=persisted or self._match_text_seq("PERSISTED"), 6132 data_type=exp.Var(this="AUTO") 6133 if self._match_text_seq("AUTO") 6134 else self._parse_types(), 6135 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6136 ) 6137 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6138 elif ( 6139 kind 6140 and self._match(TokenType.ALIAS, advance=False) 6141 and ( 6142 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6143 or (self._next and self._next.token_type == TokenType.L_PAREN) 6144 ) 6145 ): 6146 self._advance() 6147 constraints.append( 6148 self.expression( 6149 exp.ColumnConstraint, 6150 kind=exp.ComputedColumnConstraint( 6151 this=self._parse_disjunction(), 6152 persisted=self._match_texts(("STORED", "VIRTUAL")) 6153 and self._prev.text.upper() == "STORED", 6154 ), 6155 ) 6156 ) 6157 6158 while True: 6159 constraint = self._parse_column_constraint() 6160 if not constraint: 6161 break 6162 constraints.append(constraint) 6163 6164 if not kind and not constraints: 6165 return this 6166 6167 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6168 6169 def _parse_auto_increment( 6170 self, 6171 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6172 start = None 6173 increment = None 6174 order = None 6175 6176 if self._match(TokenType.L_PAREN, advance=False): 6177 args = self._parse_wrapped_csv(self._parse_bitwise) 6178 start = seq_get(args, 0) 6179 increment = seq_get(args, 1) 6180 elif self._match_text_seq("START"): 6181 start = self._parse_bitwise() 6182 self._match_text_seq("INCREMENT") 6183 increment = self._parse_bitwise() 6184 if self._match_text_seq("ORDER"): 6185 order = True 6186 elif self._match_text_seq("NOORDER"): 6187 order = False 6188 6189 if start and increment: 6190 return exp.GeneratedAsIdentityColumnConstraint( 6191 start=start, increment=increment, this=False, order=order 6192 ) 6193 6194 return exp.AutoIncrementColumnConstraint() 6195 6196 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6197 if not self._match_text_seq("REFRESH"): 6198 self._retreat(self._index - 1) 6199 return None 6200 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6201 6202 def _parse_compress(self) -> exp.CompressColumnConstraint: 6203 if self._match(TokenType.L_PAREN, advance=False): 6204 return self.expression( 6205 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6206 ) 6207 6208 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6209 6210 def _parse_generated_as_identity( 6211 self, 6212 ) -> ( 6213 exp.GeneratedAsIdentityColumnConstraint 6214 | exp.ComputedColumnConstraint 6215 | exp.GeneratedAsRowColumnConstraint 6216 ): 6217 if self._match_text_seq("BY", "DEFAULT"): 6218 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6219 this = self.expression( 6220 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6221 ) 6222 else: 6223 self._match_text_seq("ALWAYS") 6224 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6225 6226 self._match(TokenType.ALIAS) 6227 6228 if self._match_text_seq("ROW"): 6229 start = self._match_text_seq("START") 6230 if not start: 6231 self._match(TokenType.END) 6232 hidden = self._match_text_seq("HIDDEN") 6233 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6234 6235 identity = self._match_text_seq("IDENTITY") 6236 6237 if self._match(TokenType.L_PAREN): 6238 if self._match(TokenType.START_WITH): 6239 this.set("start", self._parse_bitwise()) 6240 if self._match_text_seq("INCREMENT", "BY"): 6241 this.set("increment", self._parse_bitwise()) 6242 if self._match_text_seq("MINVALUE"): 6243 this.set("minvalue", self._parse_bitwise()) 6244 if self._match_text_seq("MAXVALUE"): 6245 this.set("maxvalue", self._parse_bitwise()) 6246 6247 if self._match_text_seq("CYCLE"): 6248 this.set("cycle", True) 6249 elif self._match_text_seq("NO", "CYCLE"): 6250 this.set("cycle", False) 6251 6252 if not identity: 6253 this.set("expression", self._parse_range()) 6254 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6255 args = self._parse_csv(self._parse_bitwise) 6256 this.set("start", seq_get(args, 0)) 6257 this.set("increment", seq_get(args, 1)) 6258 6259 self._match_r_paren() 6260 6261 return this 6262 6263 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6264 self._match_text_seq("LENGTH") 6265 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6266 6267 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6268 if self._match_text_seq("NULL"): 6269 return self.expression(exp.NotNullColumnConstraint) 6270 if self._match_text_seq("CASESPECIFIC"): 6271 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6272 if self._match_text_seq("FOR", "REPLICATION"): 6273 return self.expression(exp.NotForReplicationColumnConstraint) 6274 6275 # Unconsume the `NOT` token 6276 self._retreat(self._index - 1) 6277 return None 6278 6279 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6280 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6281 6282 procedure_option_follows = ( 6283 self._match(TokenType.WITH, advance=False) 6284 and self._next 6285 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6286 ) 6287 6288 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6289 return self.expression( 6290 exp.ColumnConstraint, 6291 this=this, 6292 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6293 ) 6294 6295 return this 6296 6297 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6298 if not self._match(TokenType.CONSTRAINT): 6299 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6300 6301 return self.expression( 6302 exp.Constraint, 6303 this=self._parse_id_var(), 6304 expressions=self._parse_unnamed_constraints(), 6305 ) 6306 6307 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6308 constraints = [] 6309 while True: 6310 constraint = self._parse_unnamed_constraint() or self._parse_function() 6311 if not constraint: 6312 break 6313 constraints.append(constraint) 6314 6315 return constraints 6316 6317 def _parse_unnamed_constraint( 6318 self, constraints: t.Optional[t.Collection[str]] = None 6319 ) -> t.Optional[exp.Expression]: 6320 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6321 constraints or self.CONSTRAINT_PARSERS 6322 ): 6323 return None 6324 6325 constraint = self._prev.text.upper() 6326 if constraint not in self.CONSTRAINT_PARSERS: 6327 self.raise_error(f"No parser found for schema constraint {constraint}.") 6328 6329 return self.CONSTRAINT_PARSERS[constraint](self) 6330 6331 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6332 return self._parse_id_var(any_token=False) 6333 6334 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6335 self._match_texts(("KEY", "INDEX")) 6336 return self.expression( 6337 exp.UniqueColumnConstraint, 6338 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6339 this=self._parse_schema(self._parse_unique_key()), 6340 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6341 on_conflict=self._parse_on_conflict(), 6342 options=self._parse_key_constraint_options(), 6343 ) 6344 6345 def _parse_key_constraint_options(self) -> t.List[str]: 6346 options = [] 6347 while True: 6348 if not self._curr: 6349 break 6350 6351 if self._match(TokenType.ON): 6352 action = None 6353 on = self._advance_any() and self._prev.text 6354 6355 if self._match_text_seq("NO", "ACTION"): 6356 action = "NO ACTION" 6357 elif self._match_text_seq("CASCADE"): 6358 action = "CASCADE" 6359 elif self._match_text_seq("RESTRICT"): 6360 action = "RESTRICT" 6361 elif self._match_pair(TokenType.SET, TokenType.NULL): 6362 action = "SET NULL" 6363 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6364 action = "SET DEFAULT" 6365 else: 6366 self.raise_error("Invalid key constraint") 6367 6368 options.append(f"ON {on} {action}") 6369 else: 6370 var = self._parse_var_from_options( 6371 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6372 ) 6373 if not var: 6374 break 6375 options.append(var.name) 6376 6377 return options 6378 6379 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6380 if match and not self._match(TokenType.REFERENCES): 6381 return None 6382 6383 expressions = None 6384 this = self._parse_table(schema=True) 6385 options = self._parse_key_constraint_options() 6386 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6387 6388 def _parse_foreign_key(self) -> exp.ForeignKey: 6389 expressions = ( 6390 self._parse_wrapped_id_vars() 6391 if not self._match(TokenType.REFERENCES, advance=False) 6392 else None 6393 ) 6394 reference = self._parse_references() 6395 on_options = {} 6396 6397 while self._match(TokenType.ON): 6398 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6399 self.raise_error("Expected DELETE or UPDATE") 6400 6401 kind = self._prev.text.lower() 6402 6403 if self._match_text_seq("NO", "ACTION"): 6404 action = "NO ACTION" 6405 elif self._match(TokenType.SET): 6406 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6407 action = "SET " + self._prev.text.upper() 6408 else: 6409 self._advance() 6410 action = self._prev.text.upper() 6411 6412 on_options[kind] = action 6413 6414 return self.expression( 6415 exp.ForeignKey, 6416 expressions=expressions, 6417 reference=reference, 6418 options=self._parse_key_constraint_options(), 6419 **on_options, # type: ignore 6420 ) 6421 6422 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6423 return self._parse_ordered() or self._parse_field() 6424 6425 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6426 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6427 self._retreat(self._index - 1) 6428 return None 6429 6430 id_vars = self._parse_wrapped_id_vars() 6431 return self.expression( 6432 exp.PeriodForSystemTimeConstraint, 6433 this=seq_get(id_vars, 0), 6434 expression=seq_get(id_vars, 1), 6435 ) 6436 6437 def _parse_primary_key( 6438 self, wrapped_optional: bool = False, in_props: bool = False 6439 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6440 desc = ( 6441 self._match_set((TokenType.ASC, TokenType.DESC)) 6442 and self._prev.token_type == TokenType.DESC 6443 ) 6444 6445 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6446 return self.expression( 6447 exp.PrimaryKeyColumnConstraint, 6448 desc=desc, 6449 options=self._parse_key_constraint_options(), 6450 ) 6451 6452 expressions = self._parse_wrapped_csv( 6453 self._parse_primary_key_part, optional=wrapped_optional 6454 ) 6455 6456 return self.expression( 6457 exp.PrimaryKey, 6458 expressions=expressions, 6459 include=self._parse_index_params(), 6460 options=self._parse_key_constraint_options(), 6461 ) 6462 6463 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6464 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6465 6466 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6467 """ 6468 Parses a datetime column in ODBC format. We parse the column into the corresponding 6469 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6470 same as we did for `DATE('yyyy-mm-dd')`. 6471 6472 Reference: 6473 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6474 """ 6475 self._match(TokenType.VAR) 6476 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6477 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6478 if not self._match(TokenType.R_BRACE): 6479 self.raise_error("Expected }") 6480 return expression 6481 6482 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6483 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6484 return this 6485 6486 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6487 map_token = seq_get(self._tokens, self._index - 2) 6488 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6489 else: 6490 parse_map = False 6491 6492 bracket_kind = self._prev.token_type 6493 if ( 6494 bracket_kind == TokenType.L_BRACE 6495 and self._curr 6496 and self._curr.token_type == TokenType.VAR 6497 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6498 ): 6499 return self._parse_odbc_datetime_literal() 6500 6501 expressions = self._parse_csv( 6502 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6503 ) 6504 6505 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6506 self.raise_error("Expected ]") 6507 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6508 self.raise_error("Expected }") 6509 6510 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6511 if bracket_kind == TokenType.L_BRACE: 6512 this = self.expression( 6513 exp.Struct, 6514 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6515 ) 6516 elif not this: 6517 this = build_array_constructor( 6518 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6519 ) 6520 else: 6521 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6522 if constructor_type: 6523 return build_array_constructor( 6524 constructor_type, 6525 args=expressions, 6526 bracket_kind=bracket_kind, 6527 dialect=self.dialect, 6528 ) 6529 6530 expressions = apply_index_offset( 6531 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6532 ) 6533 this = self.expression( 6534 exp.Bracket, 6535 this=this, 6536 expressions=expressions, 6537 comments=this.pop_comments(), 6538 ) 6539 6540 self._add_comments(this) 6541 return self._parse_bracket(this) 6542 6543 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6544 if self._match(TokenType.COLON): 6545 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6546 return this 6547 6548 def _parse_case(self) -> t.Optional[exp.Expression]: 6549 if self._match(TokenType.DOT, advance=False): 6550 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6551 self._retreat(self._index - 1) 6552 return None 6553 6554 ifs = [] 6555 default = None 6556 6557 comments = self._prev_comments 6558 expression = self._parse_assignment() 6559 6560 while self._match(TokenType.WHEN): 6561 this = self._parse_assignment() 6562 self._match(TokenType.THEN) 6563 then = self._parse_assignment() 6564 ifs.append(self.expression(exp.If, this=this, true=then)) 6565 6566 if self._match(TokenType.ELSE): 6567 default = self._parse_assignment() 6568 6569 if not self._match(TokenType.END): 6570 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6571 default = exp.column("interval") 6572 else: 6573 self.raise_error("Expected END after CASE", self._prev) 6574 6575 return self.expression( 6576 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6577 ) 6578 6579 def _parse_if(self) -> t.Optional[exp.Expression]: 6580 if self._match(TokenType.L_PAREN): 6581 args = self._parse_csv( 6582 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6583 ) 6584 this = self.validate_expression(exp.If.from_arg_list(args), args) 6585 self._match_r_paren() 6586 else: 6587 index = self._index - 1 6588 6589 if self.NO_PAREN_IF_COMMANDS and index == 0: 6590 return self._parse_as_command(self._prev) 6591 6592 condition = self._parse_assignment() 6593 6594 if not condition: 6595 self._retreat(index) 6596 return None 6597 6598 self._match(TokenType.THEN) 6599 true = self._parse_assignment() 6600 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6601 self._match(TokenType.END) 6602 this = self.expression(exp.If, this=condition, true=true, false=false) 6603 6604 return this 6605 6606 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6607 if not self._match_text_seq("VALUE", "FOR"): 6608 self._retreat(self._index - 1) 6609 return None 6610 6611 return self.expression( 6612 exp.NextValueFor, 6613 this=self._parse_column(), 6614 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6615 ) 6616 6617 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6618 this = self._parse_function() or self._parse_var_or_string(upper=True) 6619 6620 if self._match(TokenType.FROM): 6621 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6622 6623 if not self._match(TokenType.COMMA): 6624 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6625 6626 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6627 6628 def _parse_gap_fill(self) -> exp.GapFill: 6629 self._match(TokenType.TABLE) 6630 this = self._parse_table() 6631 6632 self._match(TokenType.COMMA) 6633 args = [this, *self._parse_csv(self._parse_lambda)] 6634 6635 gap_fill = exp.GapFill.from_arg_list(args) 6636 return self.validate_expression(gap_fill, args) 6637 6638 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6639 this = self._parse_assignment() 6640 6641 if not self._match(TokenType.ALIAS): 6642 if self._match(TokenType.COMMA): 6643 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6644 6645 self.raise_error("Expected AS after CAST") 6646 6647 fmt = None 6648 to = self._parse_types() 6649 6650 default = self._match(TokenType.DEFAULT) 6651 if default: 6652 default = self._parse_bitwise() 6653 self._match_text_seq("ON", "CONVERSION", "ERROR") 6654 6655 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6656 fmt_string = self._parse_string() 6657 fmt = self._parse_at_time_zone(fmt_string) 6658 6659 if not to: 6660 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6661 if to.this in exp.DataType.TEMPORAL_TYPES: 6662 this = self.expression( 6663 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6664 this=this, 6665 format=exp.Literal.string( 6666 format_time( 6667 fmt_string.this if fmt_string else "", 6668 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6669 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6670 ) 6671 ), 6672 safe=safe, 6673 ) 6674 6675 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6676 this.set("zone", fmt.args["zone"]) 6677 return this 6678 elif not to: 6679 self.raise_error("Expected TYPE after CAST") 6680 elif isinstance(to, exp.Identifier): 6681 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6682 elif to.this == exp.DataType.Type.CHAR: 6683 if self._match(TokenType.CHARACTER_SET): 6684 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6685 6686 return self.build_cast( 6687 strict=strict, 6688 this=this, 6689 to=to, 6690 format=fmt, 6691 safe=safe, 6692 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6693 default=default, 6694 ) 6695 6696 def _parse_string_agg(self) -> exp.GroupConcat: 6697 if self._match(TokenType.DISTINCT): 6698 args: t.List[t.Optional[exp.Expression]] = [ 6699 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6700 ] 6701 if self._match(TokenType.COMMA): 6702 args.extend(self._parse_csv(self._parse_assignment)) 6703 else: 6704 args = self._parse_csv(self._parse_assignment) # type: ignore 6705 6706 if self._match_text_seq("ON", "OVERFLOW"): 6707 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6708 if self._match_text_seq("ERROR"): 6709 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6710 else: 6711 self._match_text_seq("TRUNCATE") 6712 on_overflow = self.expression( 6713 exp.OverflowTruncateBehavior, 6714 this=self._parse_string(), 6715 with_count=( 6716 self._match_text_seq("WITH", "COUNT") 6717 or not self._match_text_seq("WITHOUT", "COUNT") 6718 ), 6719 ) 6720 else: 6721 on_overflow = None 6722 6723 index = self._index 6724 if not self._match(TokenType.R_PAREN) and args: 6725 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6726 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6727 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6728 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6729 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6730 6731 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6732 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6733 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6734 if not self._match_text_seq("WITHIN", "GROUP"): 6735 self._retreat(index) 6736 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6737 6738 # The corresponding match_r_paren will be called in parse_function (caller) 6739 self._match_l_paren() 6740 6741 return self.expression( 6742 exp.GroupConcat, 6743 this=self._parse_order(this=seq_get(args, 0)), 6744 separator=seq_get(args, 1), 6745 on_overflow=on_overflow, 6746 ) 6747 6748 def _parse_convert( 6749 self, strict: bool, safe: t.Optional[bool] = None 6750 ) -> t.Optional[exp.Expression]: 6751 this = self._parse_bitwise() 6752 6753 if self._match(TokenType.USING): 6754 to: t.Optional[exp.Expression] = self.expression( 6755 exp.CharacterSet, this=self._parse_var() 6756 ) 6757 elif self._match(TokenType.COMMA): 6758 to = self._parse_types() 6759 else: 6760 to = None 6761 6762 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6763 6764 def _parse_xml_table(self) -> exp.XMLTable: 6765 namespaces = None 6766 passing = None 6767 columns = None 6768 6769 if self._match_text_seq("XMLNAMESPACES", "("): 6770 namespaces = self._parse_xml_namespace() 6771 self._match_text_seq(")", ",") 6772 6773 this = self._parse_string() 6774 6775 if self._match_text_seq("PASSING"): 6776 # The BY VALUE keywords are optional and are provided for semantic clarity 6777 self._match_text_seq("BY", "VALUE") 6778 passing = self._parse_csv(self._parse_column) 6779 6780 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6781 6782 if self._match_text_seq("COLUMNS"): 6783 columns = self._parse_csv(self._parse_field_def) 6784 6785 return self.expression( 6786 exp.XMLTable, 6787 this=this, 6788 namespaces=namespaces, 6789 passing=passing, 6790 columns=columns, 6791 by_ref=by_ref, 6792 ) 6793 6794 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6795 namespaces = [] 6796 6797 while True: 6798 if self._match(TokenType.DEFAULT): 6799 uri = self._parse_string() 6800 else: 6801 uri = self._parse_alias(self._parse_string()) 6802 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6803 if not self._match(TokenType.COMMA): 6804 break 6805 6806 return namespaces 6807 6808 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6809 args = self._parse_csv(self._parse_assignment) 6810 6811 if len(args) < 3: 6812 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6813 6814 return self.expression(exp.DecodeCase, expressions=args) 6815 6816 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6817 self._match_text_seq("KEY") 6818 key = self._parse_column() 6819 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6820 self._match_text_seq("VALUE") 6821 value = self._parse_bitwise() 6822 6823 if not key and not value: 6824 return None 6825 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6826 6827 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6828 if not this or not self._match_text_seq("FORMAT", "JSON"): 6829 return this 6830 6831 return self.expression(exp.FormatJson, this=this) 6832 6833 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6834 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6835 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6836 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6837 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6838 else: 6839 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6840 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6841 6842 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6843 6844 if not empty and not error and not null: 6845 return None 6846 6847 return self.expression( 6848 exp.OnCondition, 6849 empty=empty, 6850 error=error, 6851 null=null, 6852 ) 6853 6854 def _parse_on_handling( 6855 self, on: str, *values: str 6856 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6857 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6858 for value in values: 6859 if self._match_text_seq(value, "ON", on): 6860 return f"{value} ON {on}" 6861 6862 index = self._index 6863 if self._match(TokenType.DEFAULT): 6864 default_value = self._parse_bitwise() 6865 if self._match_text_seq("ON", on): 6866 return default_value 6867 6868 self._retreat(index) 6869 6870 return None 6871 6872 @t.overload 6873 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6874 6875 @t.overload 6876 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6877 6878 def _parse_json_object(self, agg=False): 6879 star = self._parse_star() 6880 expressions = ( 6881 [star] 6882 if star 6883 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6884 ) 6885 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6886 6887 unique_keys = None 6888 if self._match_text_seq("WITH", "UNIQUE"): 6889 unique_keys = True 6890 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6891 unique_keys = False 6892 6893 self._match_text_seq("KEYS") 6894 6895 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6896 self._parse_type() 6897 ) 6898 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6899 6900 return self.expression( 6901 exp.JSONObjectAgg if agg else exp.JSONObject, 6902 expressions=expressions, 6903 null_handling=null_handling, 6904 unique_keys=unique_keys, 6905 return_type=return_type, 6906 encoding=encoding, 6907 ) 6908 6909 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6910 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6911 if not self._match_text_seq("NESTED"): 6912 this = self._parse_id_var() 6913 kind = self._parse_types(allow_identifiers=False) 6914 nested = None 6915 else: 6916 this = None 6917 kind = None 6918 nested = True 6919 6920 path = self._match_text_seq("PATH") and self._parse_string() 6921 nested_schema = nested and self._parse_json_schema() 6922 6923 return self.expression( 6924 exp.JSONColumnDef, 6925 this=this, 6926 kind=kind, 6927 path=path, 6928 nested_schema=nested_schema, 6929 ) 6930 6931 def _parse_json_schema(self) -> exp.JSONSchema: 6932 self._match_text_seq("COLUMNS") 6933 return self.expression( 6934 exp.JSONSchema, 6935 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6936 ) 6937 6938 def _parse_json_table(self) -> exp.JSONTable: 6939 this = self._parse_format_json(self._parse_bitwise()) 6940 path = self._match(TokenType.COMMA) and self._parse_string() 6941 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6942 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6943 schema = self._parse_json_schema() 6944 6945 return exp.JSONTable( 6946 this=this, 6947 schema=schema, 6948 path=path, 6949 error_handling=error_handling, 6950 empty_handling=empty_handling, 6951 ) 6952 6953 def _parse_match_against(self) -> exp.MatchAgainst: 6954 if self._match_text_seq("TABLE"): 6955 # parse SingleStore MATCH(TABLE ...) syntax 6956 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 6957 expressions = [] 6958 table = self._parse_table() 6959 if table: 6960 expressions = [table] 6961 else: 6962 expressions = self._parse_csv(self._parse_column) 6963 6964 self._match_text_seq(")", "AGAINST", "(") 6965 6966 this = self._parse_string() 6967 6968 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6969 modifier = "IN NATURAL LANGUAGE MODE" 6970 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6971 modifier = f"{modifier} WITH QUERY EXPANSION" 6972 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6973 modifier = "IN BOOLEAN MODE" 6974 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6975 modifier = "WITH QUERY EXPANSION" 6976 else: 6977 modifier = None 6978 6979 return self.expression( 6980 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6981 ) 6982 6983 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6984 def _parse_open_json(self) -> exp.OpenJSON: 6985 this = self._parse_bitwise() 6986 path = self._match(TokenType.COMMA) and self._parse_string() 6987 6988 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6989 this = self._parse_field(any_token=True) 6990 kind = self._parse_types() 6991 path = self._parse_string() 6992 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6993 6994 return self.expression( 6995 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6996 ) 6997 6998 expressions = None 6999 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 7000 self._match_l_paren() 7001 expressions = self._parse_csv(_parse_open_json_column_def) 7002 7003 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 7004 7005 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 7006 args = self._parse_csv(self._parse_bitwise) 7007 7008 if self._match(TokenType.IN): 7009 return self.expression( 7010 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 7011 ) 7012 7013 if haystack_first: 7014 haystack = seq_get(args, 0) 7015 needle = seq_get(args, 1) 7016 else: 7017 haystack = seq_get(args, 1) 7018 needle = seq_get(args, 0) 7019 7020 return self.expression( 7021 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 7022 ) 7023 7024 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 7025 args = self._parse_csv(self._parse_table) 7026 return exp.JoinHint(this=func_name.upper(), expressions=args) 7027 7028 def _parse_substring(self) -> exp.Substring: 7029 # Postgres supports the form: substring(string [from int] [for int]) 7030 # (despite being undocumented, the reverse order also works) 7031 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 7032 7033 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 7034 7035 start, length = None, None 7036 7037 while self._curr: 7038 if self._match(TokenType.FROM): 7039 start = self._parse_bitwise() 7040 elif self._match(TokenType.FOR): 7041 if not start: 7042 start = exp.Literal.number(1) 7043 length = self._parse_bitwise() 7044 else: 7045 break 7046 7047 if start: 7048 args.append(start) 7049 if length: 7050 args.append(length) 7051 7052 return self.validate_expression(exp.Substring.from_arg_list(args), args) 7053 7054 def _parse_trim(self) -> exp.Trim: 7055 # https://www.w3resource.com/sql/character-functions/trim.php 7056 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 7057 7058 position = None 7059 collation = None 7060 expression = None 7061 7062 if self._match_texts(self.TRIM_TYPES): 7063 position = self._prev.text.upper() 7064 7065 this = self._parse_bitwise() 7066 if self._match_set((TokenType.FROM, TokenType.COMMA)): 7067 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 7068 expression = self._parse_bitwise() 7069 7070 if invert_order: 7071 this, expression = expression, this 7072 7073 if self._match(TokenType.COLLATE): 7074 collation = self._parse_bitwise() 7075 7076 return self.expression( 7077 exp.Trim, this=this, position=position, expression=expression, collation=collation 7078 ) 7079 7080 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 7081 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 7082 7083 def _parse_named_window(self) -> t.Optional[exp.Expression]: 7084 return self._parse_window(self._parse_id_var(), alias=True) 7085 7086 def _parse_respect_or_ignore_nulls( 7087 self, this: t.Optional[exp.Expression] 7088 ) -> t.Optional[exp.Expression]: 7089 if self._match_text_seq("IGNORE", "NULLS"): 7090 return self.expression(exp.IgnoreNulls, this=this) 7091 if self._match_text_seq("RESPECT", "NULLS"): 7092 return self.expression(exp.RespectNulls, this=this) 7093 return this 7094 7095 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7096 if self._match(TokenType.HAVING): 7097 self._match_texts(("MAX", "MIN")) 7098 max = self._prev.text.upper() != "MIN" 7099 return self.expression( 7100 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7101 ) 7102 7103 return this 7104 7105 def _parse_window( 7106 self, this: t.Optional[exp.Expression], alias: bool = False 7107 ) -> t.Optional[exp.Expression]: 7108 func = this 7109 comments = func.comments if isinstance(func, exp.Expression) else None 7110 7111 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7112 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7113 if self._match_text_seq("WITHIN", "GROUP"): 7114 order = self._parse_wrapped(self._parse_order) 7115 this = self.expression(exp.WithinGroup, this=this, expression=order) 7116 7117 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7118 self._match(TokenType.WHERE) 7119 this = self.expression( 7120 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7121 ) 7122 self._match_r_paren() 7123 7124 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7125 # Some dialects choose to implement and some do not. 7126 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7127 7128 # There is some code above in _parse_lambda that handles 7129 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7130 7131 # The below changes handle 7132 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7133 7134 # Oracle allows both formats 7135 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7136 # and Snowflake chose to do the same for familiarity 7137 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7138 if isinstance(this, exp.AggFunc): 7139 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7140 7141 if ignore_respect and ignore_respect is not this: 7142 ignore_respect.replace(ignore_respect.this) 7143 this = self.expression(ignore_respect.__class__, this=this) 7144 7145 this = self._parse_respect_or_ignore_nulls(this) 7146 7147 # bigquery select from window x AS (partition by ...) 7148 if alias: 7149 over = None 7150 self._match(TokenType.ALIAS) 7151 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7152 return this 7153 else: 7154 over = self._prev.text.upper() 7155 7156 if comments and isinstance(func, exp.Expression): 7157 func.pop_comments() 7158 7159 if not self._match(TokenType.L_PAREN): 7160 return self.expression( 7161 exp.Window, 7162 comments=comments, 7163 this=this, 7164 alias=self._parse_id_var(False), 7165 over=over, 7166 ) 7167 7168 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7169 7170 first = self._match(TokenType.FIRST) 7171 if self._match_text_seq("LAST"): 7172 first = False 7173 7174 partition, order = self._parse_partition_and_order() 7175 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7176 7177 if kind: 7178 self._match(TokenType.BETWEEN) 7179 start = self._parse_window_spec() 7180 7181 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7182 exclude = ( 7183 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7184 if self._match_text_seq("EXCLUDE") 7185 else None 7186 ) 7187 7188 spec = self.expression( 7189 exp.WindowSpec, 7190 kind=kind, 7191 start=start["value"], 7192 start_side=start["side"], 7193 end=end.get("value"), 7194 end_side=end.get("side"), 7195 exclude=exclude, 7196 ) 7197 else: 7198 spec = None 7199 7200 self._match_r_paren() 7201 7202 window = self.expression( 7203 exp.Window, 7204 comments=comments, 7205 this=this, 7206 partition_by=partition, 7207 order=order, 7208 spec=spec, 7209 alias=window_alias, 7210 over=over, 7211 first=first, 7212 ) 7213 7214 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7215 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7216 return self._parse_window(window, alias=alias) 7217 7218 return window 7219 7220 def _parse_partition_and_order( 7221 self, 7222 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7223 return self._parse_partition_by(), self._parse_order() 7224 7225 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7226 self._match(TokenType.BETWEEN) 7227 7228 return { 7229 "value": ( 7230 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7231 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7232 or self._parse_type() 7233 ), 7234 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7235 } 7236 7237 def _parse_alias( 7238 self, this: t.Optional[exp.Expression], explicit: bool = False 7239 ) -> t.Optional[exp.Expression]: 7240 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7241 # so this section tries to parse the clause version and if it fails, it treats the token 7242 # as an identifier (alias) 7243 if self._can_parse_limit_or_offset(): 7244 return this 7245 7246 any_token = self._match(TokenType.ALIAS) 7247 comments = self._prev_comments or [] 7248 7249 if explicit and not any_token: 7250 return this 7251 7252 if self._match(TokenType.L_PAREN): 7253 aliases = self.expression( 7254 exp.Aliases, 7255 comments=comments, 7256 this=this, 7257 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7258 ) 7259 self._match_r_paren(aliases) 7260 return aliases 7261 7262 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7263 self.STRING_ALIASES and self._parse_string_as_identifier() 7264 ) 7265 7266 if alias: 7267 comments.extend(alias.pop_comments()) 7268 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7269 column = this.this 7270 7271 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7272 if not this.comments and column and column.comments: 7273 this.comments = column.pop_comments() 7274 7275 return this 7276 7277 def _parse_id_var( 7278 self, 7279 any_token: bool = True, 7280 tokens: t.Optional[t.Collection[TokenType]] = None, 7281 ) -> t.Optional[exp.Expression]: 7282 expression = self._parse_identifier() 7283 if not expression and ( 7284 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7285 ): 7286 quoted = self._prev.token_type == TokenType.STRING 7287 expression = self._identifier_expression(quoted=quoted) 7288 7289 return expression 7290 7291 def _parse_string(self) -> t.Optional[exp.Expression]: 7292 if self._match_set(self.STRING_PARSERS): 7293 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7294 return self._parse_placeholder() 7295 7296 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7297 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7298 if output: 7299 output.update_positions(self._prev) 7300 return output 7301 7302 def _parse_number(self) -> t.Optional[exp.Expression]: 7303 if self._match_set(self.NUMERIC_PARSERS): 7304 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7305 return self._parse_placeholder() 7306 7307 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7308 if self._match(TokenType.IDENTIFIER): 7309 return self._identifier_expression(quoted=True) 7310 return self._parse_placeholder() 7311 7312 def _parse_var( 7313 self, 7314 any_token: bool = False, 7315 tokens: t.Optional[t.Collection[TokenType]] = None, 7316 upper: bool = False, 7317 ) -> t.Optional[exp.Expression]: 7318 if ( 7319 (any_token and self._advance_any()) 7320 or self._match(TokenType.VAR) 7321 or (self._match_set(tokens) if tokens else False) 7322 ): 7323 return self.expression( 7324 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7325 ) 7326 return self._parse_placeholder() 7327 7328 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7329 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7330 self._advance() 7331 return self._prev 7332 return None 7333 7334 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7335 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7336 7337 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7338 return self._parse_primary() or self._parse_var(any_token=True) 7339 7340 def _parse_null(self) -> t.Optional[exp.Expression]: 7341 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 7342 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7343 return self._parse_placeholder() 7344 7345 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7346 if self._match(TokenType.TRUE): 7347 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7348 if self._match(TokenType.FALSE): 7349 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7350 return self._parse_placeholder() 7351 7352 def _parse_star(self) -> t.Optional[exp.Expression]: 7353 if self._match(TokenType.STAR): 7354 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7355 return self._parse_placeholder() 7356 7357 def _parse_parameter(self) -> exp.Parameter: 7358 this = self._parse_identifier() or self._parse_primary_or_var() 7359 return self.expression(exp.Parameter, this=this) 7360 7361 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7362 if self._match_set(self.PLACEHOLDER_PARSERS): 7363 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7364 if placeholder: 7365 return placeholder 7366 self._advance(-1) 7367 return None 7368 7369 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7370 if not self._match_texts(keywords): 7371 return None 7372 if self._match(TokenType.L_PAREN, advance=False): 7373 return self._parse_wrapped_csv(self._parse_expression) 7374 7375 expression = self._parse_alias(self._parse_assignment(), explicit=True) 7376 return [expression] if expression else None 7377 7378 def _parse_csv( 7379 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7380 ) -> t.List[exp.Expression]: 7381 parse_result = parse_method() 7382 items = [parse_result] if parse_result is not None else [] 7383 7384 while self._match(sep): 7385 self._add_comments(parse_result) 7386 parse_result = parse_method() 7387 if parse_result is not None: 7388 items.append(parse_result) 7389 7390 return items 7391 7392 def _parse_tokens( 7393 self, parse_method: t.Callable, expressions: t.Dict 7394 ) -> t.Optional[exp.Expression]: 7395 this = parse_method() 7396 7397 while self._match_set(expressions): 7398 this = self.expression( 7399 expressions[self._prev.token_type], 7400 this=this, 7401 comments=self._prev_comments, 7402 expression=parse_method(), 7403 ) 7404 7405 return this 7406 7407 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7408 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7409 7410 def _parse_wrapped_csv( 7411 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7412 ) -> t.List[exp.Expression]: 7413 return self._parse_wrapped( 7414 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7415 ) 7416 7417 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7418 wrapped = self._match(TokenType.L_PAREN) 7419 if not wrapped and not optional: 7420 self.raise_error("Expecting (") 7421 parse_result = parse_method() 7422 if wrapped: 7423 self._match_r_paren() 7424 return parse_result 7425 7426 def _parse_expressions(self) -> t.List[exp.Expression]: 7427 return self._parse_csv(self._parse_expression) 7428 7429 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7430 return ( 7431 self._parse_set_operations( 7432 self._parse_alias(self._parse_assignment(), explicit=True) 7433 if alias 7434 else self._parse_assignment() 7435 ) 7436 or self._parse_select() 7437 ) 7438 7439 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7440 return self._parse_query_modifiers( 7441 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7442 ) 7443 7444 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7445 this = None 7446 if self._match_texts(self.TRANSACTION_KIND): 7447 this = self._prev.text 7448 7449 self._match_texts(("TRANSACTION", "WORK")) 7450 7451 modes = [] 7452 while True: 7453 mode = [] 7454 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7455 mode.append(self._prev.text) 7456 7457 if mode: 7458 modes.append(" ".join(mode)) 7459 if not self._match(TokenType.COMMA): 7460 break 7461 7462 return self.expression(exp.Transaction, this=this, modes=modes) 7463 7464 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7465 chain = None 7466 savepoint = None 7467 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7468 7469 self._match_texts(("TRANSACTION", "WORK")) 7470 7471 if self._match_text_seq("TO"): 7472 self._match_text_seq("SAVEPOINT") 7473 savepoint = self._parse_id_var() 7474 7475 if self._match(TokenType.AND): 7476 chain = not self._match_text_seq("NO") 7477 self._match_text_seq("CHAIN") 7478 7479 if is_rollback: 7480 return self.expression(exp.Rollback, savepoint=savepoint) 7481 7482 return self.expression(exp.Commit, chain=chain) 7483 7484 def _parse_refresh(self) -> exp.Refresh: 7485 self._match(TokenType.TABLE) 7486 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7487 7488 def _parse_column_def_with_exists(self): 7489 start = self._index 7490 self._match(TokenType.COLUMN) 7491 7492 exists_column = self._parse_exists(not_=True) 7493 expression = self._parse_field_def() 7494 7495 if not isinstance(expression, exp.ColumnDef): 7496 self._retreat(start) 7497 return None 7498 7499 expression.set("exists", exists_column) 7500 7501 return expression 7502 7503 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7504 if not self._prev.text.upper() == "ADD": 7505 return None 7506 7507 expression = self._parse_column_def_with_exists() 7508 if not expression: 7509 return None 7510 7511 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7512 if self._match_texts(("FIRST", "AFTER")): 7513 position = self._prev.text 7514 column_position = self.expression( 7515 exp.ColumnPosition, this=self._parse_column(), position=position 7516 ) 7517 expression.set("position", column_position) 7518 7519 return expression 7520 7521 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7522 drop = self._match(TokenType.DROP) and self._parse_drop() 7523 if drop and not isinstance(drop, exp.Command): 7524 drop.set("kind", drop.args.get("kind", "COLUMN")) 7525 return drop 7526 7527 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7528 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7529 return self.expression( 7530 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7531 ) 7532 7533 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7534 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7535 self._match_text_seq("ADD") 7536 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7537 return self.expression( 7538 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7539 ) 7540 7541 column_def = self._parse_add_column() 7542 if isinstance(column_def, exp.ColumnDef): 7543 return column_def 7544 7545 exists = self._parse_exists(not_=True) 7546 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7547 return self.expression( 7548 exp.AddPartition, 7549 exists=exists, 7550 this=self._parse_field(any_token=True), 7551 location=self._match_text_seq("LOCATION", advance=False) 7552 and self._parse_property(), 7553 ) 7554 7555 return None 7556 7557 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7558 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7559 or self._match_text_seq("COLUMNS") 7560 ): 7561 schema = self._parse_schema() 7562 7563 return ( 7564 ensure_list(schema) 7565 if schema 7566 else self._parse_csv(self._parse_column_def_with_exists) 7567 ) 7568 7569 return self._parse_csv(_parse_add_alteration) 7570 7571 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7572 if self._match_texts(self.ALTER_ALTER_PARSERS): 7573 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7574 7575 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7576 # keyword after ALTER we default to parsing this statement 7577 self._match(TokenType.COLUMN) 7578 column = self._parse_field(any_token=True) 7579 7580 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7581 return self.expression(exp.AlterColumn, this=column, drop=True) 7582 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7583 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7584 if self._match(TokenType.COMMENT): 7585 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7586 if self._match_text_seq("DROP", "NOT", "NULL"): 7587 return self.expression( 7588 exp.AlterColumn, 7589 this=column, 7590 drop=True, 7591 allow_null=True, 7592 ) 7593 if self._match_text_seq("SET", "NOT", "NULL"): 7594 return self.expression( 7595 exp.AlterColumn, 7596 this=column, 7597 allow_null=False, 7598 ) 7599 7600 if self._match_text_seq("SET", "VISIBLE"): 7601 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7602 if self._match_text_seq("SET", "INVISIBLE"): 7603 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7604 7605 self._match_text_seq("SET", "DATA") 7606 self._match_text_seq("TYPE") 7607 return self.expression( 7608 exp.AlterColumn, 7609 this=column, 7610 dtype=self._parse_types(), 7611 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7612 using=self._match(TokenType.USING) and self._parse_assignment(), 7613 ) 7614 7615 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7616 if self._match_texts(("ALL", "EVEN", "AUTO")): 7617 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7618 7619 self._match_text_seq("KEY", "DISTKEY") 7620 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7621 7622 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7623 if compound: 7624 self._match_text_seq("SORTKEY") 7625 7626 if self._match(TokenType.L_PAREN, advance=False): 7627 return self.expression( 7628 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7629 ) 7630 7631 self._match_texts(("AUTO", "NONE")) 7632 return self.expression( 7633 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7634 ) 7635 7636 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7637 index = self._index - 1 7638 7639 partition_exists = self._parse_exists() 7640 if self._match(TokenType.PARTITION, advance=False): 7641 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7642 7643 self._retreat(index) 7644 return self._parse_csv(self._parse_drop_column) 7645 7646 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7647 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7648 exists = self._parse_exists() 7649 old_column = self._parse_column() 7650 to = self._match_text_seq("TO") 7651 new_column = self._parse_column() 7652 7653 if old_column is None or to is None or new_column is None: 7654 return None 7655 7656 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7657 7658 self._match_text_seq("TO") 7659 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7660 7661 def _parse_alter_table_set(self) -> exp.AlterSet: 7662 alter_set = self.expression(exp.AlterSet) 7663 7664 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7665 "TABLE", "PROPERTIES" 7666 ): 7667 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7668 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7669 alter_set.set("expressions", [self._parse_assignment()]) 7670 elif self._match_texts(("LOGGED", "UNLOGGED")): 7671 alter_set.set("option", exp.var(self._prev.text.upper())) 7672 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7673 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7674 elif self._match_text_seq("LOCATION"): 7675 alter_set.set("location", self._parse_field()) 7676 elif self._match_text_seq("ACCESS", "METHOD"): 7677 alter_set.set("access_method", self._parse_field()) 7678 elif self._match_text_seq("TABLESPACE"): 7679 alter_set.set("tablespace", self._parse_field()) 7680 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7681 alter_set.set("file_format", [self._parse_field()]) 7682 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7683 alter_set.set("file_format", self._parse_wrapped_options()) 7684 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7685 alter_set.set("copy_options", self._parse_wrapped_options()) 7686 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7687 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7688 else: 7689 if self._match_text_seq("SERDE"): 7690 alter_set.set("serde", self._parse_field()) 7691 7692 properties = self._parse_wrapped(self._parse_properties, optional=True) 7693 alter_set.set("expressions", [properties]) 7694 7695 return alter_set 7696 7697 def _parse_alter_session(self) -> exp.AlterSession: 7698 """Parse ALTER SESSION SET/UNSET statements.""" 7699 if self._match(TokenType.SET): 7700 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7701 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7702 7703 self._match_text_seq("UNSET") 7704 expressions = self._parse_csv( 7705 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7706 ) 7707 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7708 7709 def _parse_alter(self) -> exp.Alter | exp.Command: 7710 start = self._prev 7711 7712 alter_token = self._match_set(self.ALTERABLES) and self._prev 7713 if not alter_token: 7714 return self._parse_as_command(start) 7715 7716 exists = self._parse_exists() 7717 only = self._match_text_seq("ONLY") 7718 7719 if alter_token.token_type == TokenType.SESSION: 7720 this = None 7721 check = None 7722 cluster = None 7723 else: 7724 this = self._parse_table(schema=True) 7725 check = self._match_text_seq("WITH", "CHECK") 7726 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7727 7728 if self._next: 7729 self._advance() 7730 7731 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7732 if parser: 7733 actions = ensure_list(parser(self)) 7734 not_valid = self._match_text_seq("NOT", "VALID") 7735 options = self._parse_csv(self._parse_property) 7736 7737 if not self._curr and actions: 7738 return self.expression( 7739 exp.Alter, 7740 this=this, 7741 kind=alter_token.text.upper(), 7742 exists=exists, 7743 actions=actions, 7744 only=only, 7745 options=options, 7746 cluster=cluster, 7747 not_valid=not_valid, 7748 check=check, 7749 ) 7750 7751 return self._parse_as_command(start) 7752 7753 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7754 start = self._prev 7755 # https://duckdb.org/docs/sql/statements/analyze 7756 if not self._curr: 7757 return self.expression(exp.Analyze) 7758 7759 options = [] 7760 while self._match_texts(self.ANALYZE_STYLES): 7761 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7762 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7763 else: 7764 options.append(self._prev.text.upper()) 7765 7766 this: t.Optional[exp.Expression] = None 7767 inner_expression: t.Optional[exp.Expression] = None 7768 7769 kind = self._curr and self._curr.text.upper() 7770 7771 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7772 this = self._parse_table_parts() 7773 elif self._match_text_seq("TABLES"): 7774 if self._match_set((TokenType.FROM, TokenType.IN)): 7775 kind = f"{kind} {self._prev.text.upper()}" 7776 this = self._parse_table(schema=True, is_db_reference=True) 7777 elif self._match_text_seq("DATABASE"): 7778 this = self._parse_table(schema=True, is_db_reference=True) 7779 elif self._match_text_seq("CLUSTER"): 7780 this = self._parse_table() 7781 # Try matching inner expr keywords before fallback to parse table. 7782 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7783 kind = None 7784 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7785 else: 7786 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7787 kind = None 7788 this = self._parse_table_parts() 7789 7790 partition = self._try_parse(self._parse_partition) 7791 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7792 return self._parse_as_command(start) 7793 7794 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7795 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7796 "WITH", "ASYNC", "MODE" 7797 ): 7798 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7799 else: 7800 mode = None 7801 7802 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7803 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7804 7805 properties = self._parse_properties() 7806 return self.expression( 7807 exp.Analyze, 7808 kind=kind, 7809 this=this, 7810 mode=mode, 7811 partition=partition, 7812 properties=properties, 7813 expression=inner_expression, 7814 options=options, 7815 ) 7816 7817 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7818 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7819 this = None 7820 kind = self._prev.text.upper() 7821 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7822 expressions = [] 7823 7824 if not self._match_text_seq("STATISTICS"): 7825 self.raise_error("Expecting token STATISTICS") 7826 7827 if self._match_text_seq("NOSCAN"): 7828 this = "NOSCAN" 7829 elif self._match(TokenType.FOR): 7830 if self._match_text_seq("ALL", "COLUMNS"): 7831 this = "FOR ALL COLUMNS" 7832 if self._match_texts("COLUMNS"): 7833 this = "FOR COLUMNS" 7834 expressions = self._parse_csv(self._parse_column_reference) 7835 elif self._match_text_seq("SAMPLE"): 7836 sample = self._parse_number() 7837 expressions = [ 7838 self.expression( 7839 exp.AnalyzeSample, 7840 sample=sample, 7841 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7842 ) 7843 ] 7844 7845 return self.expression( 7846 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7847 ) 7848 7849 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7850 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7851 kind = None 7852 this = None 7853 expression: t.Optional[exp.Expression] = None 7854 if self._match_text_seq("REF", "UPDATE"): 7855 kind = "REF" 7856 this = "UPDATE" 7857 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7858 this = "UPDATE SET DANGLING TO NULL" 7859 elif self._match_text_seq("STRUCTURE"): 7860 kind = "STRUCTURE" 7861 if self._match_text_seq("CASCADE", "FAST"): 7862 this = "CASCADE FAST" 7863 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7864 ("ONLINE", "OFFLINE") 7865 ): 7866 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7867 expression = self._parse_into() 7868 7869 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7870 7871 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7872 this = self._prev.text.upper() 7873 if self._match_text_seq("COLUMNS"): 7874 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7875 return None 7876 7877 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7878 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7879 if self._match_text_seq("STATISTICS"): 7880 return self.expression(exp.AnalyzeDelete, kind=kind) 7881 return None 7882 7883 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7884 if self._match_text_seq("CHAINED", "ROWS"): 7885 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7886 return None 7887 7888 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7889 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7890 this = self._prev.text.upper() 7891 expression: t.Optional[exp.Expression] = None 7892 expressions = [] 7893 update_options = None 7894 7895 if self._match_text_seq("HISTOGRAM", "ON"): 7896 expressions = self._parse_csv(self._parse_column_reference) 7897 with_expressions = [] 7898 while self._match(TokenType.WITH): 7899 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7900 if self._match_texts(("SYNC", "ASYNC")): 7901 if self._match_text_seq("MODE", advance=False): 7902 with_expressions.append(f"{self._prev.text.upper()} MODE") 7903 self._advance() 7904 else: 7905 buckets = self._parse_number() 7906 if self._match_text_seq("BUCKETS"): 7907 with_expressions.append(f"{buckets} BUCKETS") 7908 if with_expressions: 7909 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7910 7911 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7912 TokenType.UPDATE, advance=False 7913 ): 7914 update_options = self._prev.text.upper() 7915 self._advance() 7916 elif self._match_text_seq("USING", "DATA"): 7917 expression = self.expression(exp.UsingData, this=self._parse_string()) 7918 7919 return self.expression( 7920 exp.AnalyzeHistogram, 7921 this=this, 7922 expressions=expressions, 7923 expression=expression, 7924 update_options=update_options, 7925 ) 7926 7927 def _parse_merge(self) -> exp.Merge: 7928 self._match(TokenType.INTO) 7929 target = self._parse_table() 7930 7931 if target and self._match(TokenType.ALIAS, advance=False): 7932 target.set("alias", self._parse_table_alias()) 7933 7934 self._match(TokenType.USING) 7935 using = self._parse_table() 7936 7937 self._match(TokenType.ON) 7938 on = self._parse_assignment() 7939 7940 return self.expression( 7941 exp.Merge, 7942 this=target, 7943 using=using, 7944 on=on, 7945 whens=self._parse_when_matched(), 7946 returning=self._parse_returning(), 7947 ) 7948 7949 def _parse_when_matched(self) -> exp.Whens: 7950 whens = [] 7951 7952 while self._match(TokenType.WHEN): 7953 matched = not self._match(TokenType.NOT) 7954 self._match_text_seq("MATCHED") 7955 source = ( 7956 False 7957 if self._match_text_seq("BY", "TARGET") 7958 else self._match_text_seq("BY", "SOURCE") 7959 ) 7960 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7961 7962 self._match(TokenType.THEN) 7963 7964 if self._match(TokenType.INSERT): 7965 this = self._parse_star() 7966 if this: 7967 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7968 else: 7969 then = self.expression( 7970 exp.Insert, 7971 this=exp.var("ROW") 7972 if self._match_text_seq("ROW") 7973 else self._parse_value(values=False), 7974 expression=self._match_text_seq("VALUES") and self._parse_value(), 7975 ) 7976 elif self._match(TokenType.UPDATE): 7977 expressions = self._parse_star() 7978 if expressions: 7979 then = self.expression(exp.Update, expressions=expressions) 7980 else: 7981 then = self.expression( 7982 exp.Update, 7983 expressions=self._match(TokenType.SET) 7984 and self._parse_csv(self._parse_equality), 7985 ) 7986 elif self._match(TokenType.DELETE): 7987 then = self.expression(exp.Var, this=self._prev.text) 7988 else: 7989 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7990 7991 whens.append( 7992 self.expression( 7993 exp.When, 7994 matched=matched, 7995 source=source, 7996 condition=condition, 7997 then=then, 7998 ) 7999 ) 8000 return self.expression(exp.Whens, expressions=whens) 8001 8002 def _parse_show(self) -> t.Optional[exp.Expression]: 8003 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 8004 if parser: 8005 return parser(self) 8006 return self._parse_as_command(self._prev) 8007 8008 def _parse_set_item_assignment( 8009 self, kind: t.Optional[str] = None 8010 ) -> t.Optional[exp.Expression]: 8011 index = self._index 8012 8013 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 8014 return self._parse_set_transaction(global_=kind == "GLOBAL") 8015 8016 left = self._parse_primary() or self._parse_column() 8017 assignment_delimiter = self._match_texts(("=", "TO")) 8018 8019 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 8020 self._retreat(index) 8021 return None 8022 8023 right = self._parse_statement() or self._parse_id_var() 8024 if isinstance(right, (exp.Column, exp.Identifier)): 8025 right = exp.var(right.name) 8026 8027 this = self.expression(exp.EQ, this=left, expression=right) 8028 return self.expression(exp.SetItem, this=this, kind=kind) 8029 8030 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 8031 self._match_text_seq("TRANSACTION") 8032 characteristics = self._parse_csv( 8033 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 8034 ) 8035 return self.expression( 8036 exp.SetItem, 8037 expressions=characteristics, 8038 kind="TRANSACTION", 8039 **{"global": global_}, # type: ignore 8040 ) 8041 8042 def _parse_set_item(self) -> t.Optional[exp.Expression]: 8043 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 8044 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 8045 8046 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 8047 index = self._index 8048 set_ = self.expression( 8049 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 8050 ) 8051 8052 if self._curr: 8053 self._retreat(index) 8054 return self._parse_as_command(self._prev) 8055 8056 return set_ 8057 8058 def _parse_var_from_options( 8059 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 8060 ) -> t.Optional[exp.Var]: 8061 start = self._curr 8062 if not start: 8063 return None 8064 8065 option = start.text.upper() 8066 continuations = options.get(option) 8067 8068 index = self._index 8069 self._advance() 8070 for keywords in continuations or []: 8071 if isinstance(keywords, str): 8072 keywords = (keywords,) 8073 8074 if self._match_text_seq(*keywords): 8075 option = f"{option} {' '.join(keywords)}" 8076 break 8077 else: 8078 if continuations or continuations is None: 8079 if raise_unmatched: 8080 self.raise_error(f"Unknown option {option}") 8081 8082 self._retreat(index) 8083 return None 8084 8085 return exp.var(option) 8086 8087 def _parse_as_command(self, start: Token) -> exp.Command: 8088 while self._curr: 8089 self._advance() 8090 text = self._find_sql(start, self._prev) 8091 size = len(start.text) 8092 self._warn_unsupported() 8093 return exp.Command(this=text[:size], expression=text[size:]) 8094 8095 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8096 settings = [] 8097 8098 self._match_l_paren() 8099 kind = self._parse_id_var() 8100 8101 if self._match(TokenType.L_PAREN): 8102 while True: 8103 key = self._parse_id_var() 8104 value = self._parse_primary() 8105 if not key and value is None: 8106 break 8107 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8108 self._match(TokenType.R_PAREN) 8109 8110 self._match_r_paren() 8111 8112 return self.expression( 8113 exp.DictProperty, 8114 this=this, 8115 kind=kind.this if kind else None, 8116 settings=settings, 8117 ) 8118 8119 def _parse_dict_range(self, this: str) -> exp.DictRange: 8120 self._match_l_paren() 8121 has_min = self._match_text_seq("MIN") 8122 if has_min: 8123 min = self._parse_var() or self._parse_primary() 8124 self._match_text_seq("MAX") 8125 max = self._parse_var() or self._parse_primary() 8126 else: 8127 max = self._parse_var() or self._parse_primary() 8128 min = exp.Literal.number(0) 8129 self._match_r_paren() 8130 return self.expression(exp.DictRange, this=this, min=min, max=max) 8131 8132 def _parse_comprehension( 8133 self, this: t.Optional[exp.Expression] 8134 ) -> t.Optional[exp.Comprehension]: 8135 index = self._index 8136 expression = self._parse_column() 8137 if not self._match(TokenType.IN): 8138 self._retreat(index - 1) 8139 return None 8140 iterator = self._parse_column() 8141 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8142 return self.expression( 8143 exp.Comprehension, 8144 this=this, 8145 expression=expression, 8146 iterator=iterator, 8147 condition=condition, 8148 ) 8149 8150 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8151 if self._match(TokenType.HEREDOC_STRING): 8152 return self.expression(exp.Heredoc, this=self._prev.text) 8153 8154 if not self._match_text_seq("$"): 8155 return None 8156 8157 tags = ["$"] 8158 tag_text = None 8159 8160 if self._is_connected(): 8161 self._advance() 8162 tags.append(self._prev.text.upper()) 8163 else: 8164 self.raise_error("No closing $ found") 8165 8166 if tags[-1] != "$": 8167 if self._is_connected() and self._match_text_seq("$"): 8168 tag_text = tags[-1] 8169 tags.append("$") 8170 else: 8171 self.raise_error("No closing $ found") 8172 8173 heredoc_start = self._curr 8174 8175 while self._curr: 8176 if self._match_text_seq(*tags, advance=False): 8177 this = self._find_sql(heredoc_start, self._prev) 8178 self._advance(len(tags)) 8179 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8180 8181 self._advance() 8182 8183 self.raise_error(f"No closing {''.join(tags)} found") 8184 return None 8185 8186 def _find_parser( 8187 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8188 ) -> t.Optional[t.Callable]: 8189 if not self._curr: 8190 return None 8191 8192 index = self._index 8193 this = [] 8194 while True: 8195 # The current token might be multiple words 8196 curr = self._curr.text.upper() 8197 key = curr.split(" ") 8198 this.append(curr) 8199 8200 self._advance() 8201 result, trie = in_trie(trie, key) 8202 if result == TrieResult.FAILED: 8203 break 8204 8205 if result == TrieResult.EXISTS: 8206 subparser = parsers[" ".join(this)] 8207 return subparser 8208 8209 self._retreat(index) 8210 return None 8211 8212 def _match(self, token_type, advance=True, expression=None): 8213 if not self._curr: 8214 return None 8215 8216 if self._curr.token_type == token_type: 8217 if advance: 8218 self._advance() 8219 self._add_comments(expression) 8220 return True 8221 8222 return None 8223 8224 def _match_set(self, types, advance=True): 8225 if not self._curr: 8226 return None 8227 8228 if self._curr.token_type in types: 8229 if advance: 8230 self._advance() 8231 return True 8232 8233 return None 8234 8235 def _match_pair(self, token_type_a, token_type_b, advance=True): 8236 if not self._curr or not self._next: 8237 return None 8238 8239 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8240 if advance: 8241 self._advance(2) 8242 return True 8243 8244 return None 8245 8246 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8247 if not self._match(TokenType.L_PAREN, expression=expression): 8248 self.raise_error("Expecting (") 8249 8250 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8251 if not self._match(TokenType.R_PAREN, expression=expression): 8252 self.raise_error("Expecting )") 8253 8254 def _match_texts(self, texts, advance=True): 8255 if ( 8256 self._curr 8257 and self._curr.token_type != TokenType.STRING 8258 and self._curr.text.upper() in texts 8259 ): 8260 if advance: 8261 self._advance() 8262 return True 8263 return None 8264 8265 def _match_text_seq(self, *texts, advance=True): 8266 index = self._index 8267 for text in texts: 8268 if ( 8269 self._curr 8270 and self._curr.token_type != TokenType.STRING 8271 and self._curr.text.upper() == text 8272 ): 8273 self._advance() 8274 else: 8275 self._retreat(index) 8276 return None 8277 8278 if not advance: 8279 self._retreat(index) 8280 8281 return True 8282 8283 def _replace_lambda( 8284 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8285 ) -> t.Optional[exp.Expression]: 8286 if not node: 8287 return node 8288 8289 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8290 8291 for column in node.find_all(exp.Column): 8292 typ = lambda_types.get(column.parts[0].name) 8293 if typ is not None: 8294 dot_or_id = column.to_dot() if column.table else column.this 8295 8296 if typ: 8297 dot_or_id = self.expression( 8298 exp.Cast, 8299 this=dot_or_id, 8300 to=typ, 8301 ) 8302 8303 parent = column.parent 8304 8305 while isinstance(parent, exp.Dot): 8306 if not isinstance(parent.parent, exp.Dot): 8307 parent.replace(dot_or_id) 8308 break 8309 parent = parent.parent 8310 else: 8311 if column is node: 8312 node = dot_or_id 8313 else: 8314 column.replace(dot_or_id) 8315 return node 8316 8317 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8318 start = self._prev 8319 8320 # Not to be confused with TRUNCATE(number, decimals) function call 8321 if self._match(TokenType.L_PAREN): 8322 self._retreat(self._index - 2) 8323 return self._parse_function() 8324 8325 # Clickhouse supports TRUNCATE DATABASE as well 8326 is_database = self._match(TokenType.DATABASE) 8327 8328 self._match(TokenType.TABLE) 8329 8330 exists = self._parse_exists(not_=False) 8331 8332 expressions = self._parse_csv( 8333 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8334 ) 8335 8336 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8337 8338 if self._match_text_seq("RESTART", "IDENTITY"): 8339 identity = "RESTART" 8340 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8341 identity = "CONTINUE" 8342 else: 8343 identity = None 8344 8345 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8346 option = self._prev.text 8347 else: 8348 option = None 8349 8350 partition = self._parse_partition() 8351 8352 # Fallback case 8353 if self._curr: 8354 return self._parse_as_command(start) 8355 8356 return self.expression( 8357 exp.TruncateTable, 8358 expressions=expressions, 8359 is_database=is_database, 8360 exists=exists, 8361 cluster=cluster, 8362 identity=identity, 8363 option=option, 8364 partition=partition, 8365 ) 8366 8367 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8368 this = self._parse_ordered(self._parse_opclass) 8369 8370 if not self._match(TokenType.WITH): 8371 return this 8372 8373 op = self._parse_var(any_token=True) 8374 8375 return self.expression(exp.WithOperator, this=this, op=op) 8376 8377 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8378 self._match(TokenType.EQ) 8379 self._match(TokenType.L_PAREN) 8380 8381 opts: t.List[t.Optional[exp.Expression]] = [] 8382 option: exp.Expression | None 8383 while self._curr and not self._match(TokenType.R_PAREN): 8384 if self._match_text_seq("FORMAT_NAME", "="): 8385 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8386 option = self._parse_format_name() 8387 else: 8388 option = self._parse_property() 8389 8390 if option is None: 8391 self.raise_error("Unable to parse option") 8392 break 8393 8394 opts.append(option) 8395 8396 return opts 8397 8398 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8399 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8400 8401 options = [] 8402 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8403 option = self._parse_var(any_token=True) 8404 prev = self._prev.text.upper() 8405 8406 # Different dialects might separate options and values by white space, "=" and "AS" 8407 self._match(TokenType.EQ) 8408 self._match(TokenType.ALIAS) 8409 8410 param = self.expression(exp.CopyParameter, this=option) 8411 8412 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8413 TokenType.L_PAREN, advance=False 8414 ): 8415 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8416 param.set("expressions", self._parse_wrapped_options()) 8417 elif prev == "FILE_FORMAT": 8418 # T-SQL's external file format case 8419 param.set("expression", self._parse_field()) 8420 else: 8421 param.set("expression", self._parse_unquoted_field()) 8422 8423 options.append(param) 8424 self._match(sep) 8425 8426 return options 8427 8428 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8429 expr = self.expression(exp.Credentials) 8430 8431 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8432 expr.set("storage", self._parse_field()) 8433 if self._match_text_seq("CREDENTIALS"): 8434 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8435 creds = ( 8436 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8437 ) 8438 expr.set("credentials", creds) 8439 if self._match_text_seq("ENCRYPTION"): 8440 expr.set("encryption", self._parse_wrapped_options()) 8441 if self._match_text_seq("IAM_ROLE"): 8442 expr.set("iam_role", self._parse_field()) 8443 if self._match_text_seq("REGION"): 8444 expr.set("region", self._parse_field()) 8445 8446 return expr 8447 8448 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8449 return self._parse_field() 8450 8451 def _parse_copy(self) -> exp.Copy | exp.Command: 8452 start = self._prev 8453 8454 self._match(TokenType.INTO) 8455 8456 this = ( 8457 self._parse_select(nested=True, parse_subquery_alias=False) 8458 if self._match(TokenType.L_PAREN, advance=False) 8459 else self._parse_table(schema=True) 8460 ) 8461 8462 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8463 8464 files = self._parse_csv(self._parse_file_location) 8465 if self._match(TokenType.EQ, advance=False): 8466 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 8467 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 8468 # list via `_parse_wrapped(..)` below. 8469 self._advance(-1) 8470 files = [] 8471 8472 credentials = self._parse_credentials() 8473 8474 self._match_text_seq("WITH") 8475 8476 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8477 8478 # Fallback case 8479 if self._curr: 8480 return self._parse_as_command(start) 8481 8482 return self.expression( 8483 exp.Copy, 8484 this=this, 8485 kind=kind, 8486 credentials=credentials, 8487 files=files, 8488 params=params, 8489 ) 8490 8491 def _parse_normalize(self) -> exp.Normalize: 8492 return self.expression( 8493 exp.Normalize, 8494 this=self._parse_bitwise(), 8495 form=self._match(TokenType.COMMA) and self._parse_var(), 8496 ) 8497 8498 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8499 args = self._parse_csv(lambda: self._parse_lambda()) 8500 8501 this = seq_get(args, 0) 8502 decimals = seq_get(args, 1) 8503 8504 return expr_type( 8505 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8506 ) 8507 8508 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8509 star_token = self._prev 8510 8511 if self._match_text_seq("COLUMNS", "(", advance=False): 8512 this = self._parse_function() 8513 if isinstance(this, exp.Columns): 8514 this.set("unpack", True) 8515 return this 8516 8517 return self.expression( 8518 exp.Star, 8519 **{ # type: ignore 8520 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8521 "replace": self._parse_star_op("REPLACE"), 8522 "rename": self._parse_star_op("RENAME"), 8523 }, 8524 ).update_positions(star_token) 8525 8526 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8527 privilege_parts = [] 8528 8529 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8530 # (end of privilege list) or L_PAREN (start of column list) are met 8531 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8532 privilege_parts.append(self._curr.text.upper()) 8533 self._advance() 8534 8535 this = exp.var(" ".join(privilege_parts)) 8536 expressions = ( 8537 self._parse_wrapped_csv(self._parse_column) 8538 if self._match(TokenType.L_PAREN, advance=False) 8539 else None 8540 ) 8541 8542 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8543 8544 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8545 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8546 principal = self._parse_id_var() 8547 8548 if not principal: 8549 return None 8550 8551 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8552 8553 def _parse_grant_revoke_common( 8554 self, 8555 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8556 privileges = self._parse_csv(self._parse_grant_privilege) 8557 8558 self._match(TokenType.ON) 8559 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8560 8561 # Attempt to parse the securable e.g. MySQL allows names 8562 # such as "foo.*", "*.*" which are not easily parseable yet 8563 securable = self._try_parse(self._parse_table_parts) 8564 8565 return privileges, kind, securable 8566 8567 def _parse_grant(self) -> exp.Grant | exp.Command: 8568 start = self._prev 8569 8570 privileges, kind, securable = self._parse_grant_revoke_common() 8571 8572 if not securable or not self._match_text_seq("TO"): 8573 return self._parse_as_command(start) 8574 8575 principals = self._parse_csv(self._parse_grant_principal) 8576 8577 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8578 8579 if self._curr: 8580 return self._parse_as_command(start) 8581 8582 return self.expression( 8583 exp.Grant, 8584 privileges=privileges, 8585 kind=kind, 8586 securable=securable, 8587 principals=principals, 8588 grant_option=grant_option, 8589 ) 8590 8591 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8592 start = self._prev 8593 8594 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8595 8596 privileges, kind, securable = self._parse_grant_revoke_common() 8597 8598 if not securable or not self._match_text_seq("FROM"): 8599 return self._parse_as_command(start) 8600 8601 principals = self._parse_csv(self._parse_grant_principal) 8602 8603 cascade = None 8604 if self._match_texts(("CASCADE", "RESTRICT")): 8605 cascade = self._prev.text.upper() 8606 8607 if self._curr: 8608 return self._parse_as_command(start) 8609 8610 return self.expression( 8611 exp.Revoke, 8612 privileges=privileges, 8613 kind=kind, 8614 securable=securable, 8615 principals=principals, 8616 grant_option=grant_option, 8617 cascade=cascade, 8618 ) 8619 8620 def _parse_overlay(self) -> exp.Overlay: 8621 return self.expression( 8622 exp.Overlay, 8623 **{ # type: ignore 8624 "this": self._parse_bitwise(), 8625 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8626 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8627 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8628 }, 8629 ) 8630 8631 def _parse_format_name(self) -> exp.Property: 8632 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8633 # for FILE_FORMAT = <format_name> 8634 return self.expression( 8635 exp.Property, 8636 this=exp.var("FORMAT_NAME"), 8637 value=self._parse_string() or self._parse_table_parts(), 8638 ) 8639 8640 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8641 args: t.List[exp.Expression] = [] 8642 8643 if self._match(TokenType.DISTINCT): 8644 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8645 self._match(TokenType.COMMA) 8646 8647 args.extend(self._parse_csv(self._parse_assignment)) 8648 8649 return self.expression( 8650 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8651 ) 8652 8653 def _identifier_expression( 8654 self, token: t.Optional[Token] = None, **kwargs: t.Any 8655 ) -> exp.Identifier: 8656 token = token or self._prev 8657 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8658 expression.update_positions(token) 8659 return expression 8660 8661 def _build_pipe_cte( 8662 self, 8663 query: exp.Query, 8664 expressions: t.List[exp.Expression], 8665 alias_cte: t.Optional[exp.TableAlias] = None, 8666 ) -> exp.Select: 8667 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8668 if alias_cte: 8669 new_cte = alias_cte 8670 else: 8671 self._pipe_cte_counter += 1 8672 new_cte = f"__tmp{self._pipe_cte_counter}" 8673 8674 with_ = query.args.get("with") 8675 ctes = with_.pop() if with_ else None 8676 8677 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8678 if ctes: 8679 new_select.set("with", ctes) 8680 8681 return new_select.with_(new_cte, as_=query, copy=False) 8682 8683 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8684 select = self._parse_select(consume_pipe=False) 8685 if not select: 8686 return query 8687 8688 return self._build_pipe_cte( 8689 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8690 ) 8691 8692 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8693 limit = self._parse_limit() 8694 offset = self._parse_offset() 8695 if limit: 8696 curr_limit = query.args.get("limit", limit) 8697 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8698 query.limit(limit, copy=False) 8699 if offset: 8700 curr_offset = query.args.get("offset") 8701 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8702 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8703 8704 return query 8705 8706 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8707 this = self._parse_assignment() 8708 if self._match_text_seq("GROUP", "AND", advance=False): 8709 return this 8710 8711 this = self._parse_alias(this) 8712 8713 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8714 return self._parse_ordered(lambda: this) 8715 8716 return this 8717 8718 def _parse_pipe_syntax_aggregate_group_order_by( 8719 self, query: exp.Select, group_by_exists: bool = True 8720 ) -> exp.Select: 8721 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8722 aggregates_or_groups, orders = [], [] 8723 for element in expr: 8724 if isinstance(element, exp.Ordered): 8725 this = element.this 8726 if isinstance(this, exp.Alias): 8727 element.set("this", this.args["alias"]) 8728 orders.append(element) 8729 else: 8730 this = element 8731 aggregates_or_groups.append(this) 8732 8733 if group_by_exists: 8734 query.select(*aggregates_or_groups, copy=False).group_by( 8735 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8736 copy=False, 8737 ) 8738 else: 8739 query.select(*aggregates_or_groups, append=False, copy=False) 8740 8741 if orders: 8742 return query.order_by(*orders, append=False, copy=False) 8743 8744 return query 8745 8746 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8747 self._match_text_seq("AGGREGATE") 8748 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8749 8750 if self._match(TokenType.GROUP_BY) or ( 8751 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8752 ): 8753 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8754 8755 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8756 8757 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8758 first_setop = self.parse_set_operation(this=query) 8759 if not first_setop: 8760 return None 8761 8762 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8763 expr = self._parse_paren() 8764 return expr.assert_is(exp.Subquery).unnest() if expr else None 8765 8766 first_setop.this.pop() 8767 8768 setops = [ 8769 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8770 *self._parse_csv(_parse_and_unwrap_query), 8771 ] 8772 8773 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8774 with_ = query.args.get("with") 8775 ctes = with_.pop() if with_ else None 8776 8777 if isinstance(first_setop, exp.Union): 8778 query = query.union(*setops, copy=False, **first_setop.args) 8779 elif isinstance(first_setop, exp.Except): 8780 query = query.except_(*setops, copy=False, **first_setop.args) 8781 else: 8782 query = query.intersect(*setops, copy=False, **first_setop.args) 8783 8784 query.set("with", ctes) 8785 8786 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8787 8788 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8789 join = self._parse_join() 8790 if not join: 8791 return None 8792 8793 if isinstance(query, exp.Select): 8794 return query.join(join, copy=False) 8795 8796 return query 8797 8798 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8799 pivots = self._parse_pivots() 8800 if not pivots: 8801 return query 8802 8803 from_ = query.args.get("from") 8804 if from_: 8805 from_.this.set("pivots", pivots) 8806 8807 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8808 8809 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8810 self._match_text_seq("EXTEND") 8811 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8812 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8813 8814 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8815 sample = self._parse_table_sample() 8816 8817 with_ = query.args.get("with") 8818 if with_: 8819 with_.expressions[-1].this.set("sample", sample) 8820 else: 8821 query.set("sample", sample) 8822 8823 return query 8824 8825 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8826 if isinstance(query, exp.Subquery): 8827 query = exp.select("*").from_(query, copy=False) 8828 8829 if not query.args.get("from"): 8830 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8831 8832 while self._match(TokenType.PIPE_GT): 8833 start = self._curr 8834 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8835 if not parser: 8836 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8837 # keywords, making it tricky to disambiguate them without lookahead. The approach 8838 # here is to try and parse a set operation and if that fails, then try to parse a 8839 # join operator. If that fails as well, then the operator is not supported. 8840 parsed_query = self._parse_pipe_syntax_set_operator(query) 8841 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8842 if not parsed_query: 8843 self._retreat(start) 8844 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8845 break 8846 query = parsed_query 8847 else: 8848 query = parser(self, query) 8849 8850 return query 8851 8852 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8853 vars = self._parse_csv(self._parse_id_var) 8854 if not vars: 8855 return None 8856 8857 return self.expression( 8858 exp.DeclareItem, 8859 this=vars, 8860 kind=self._parse_types(), 8861 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8862 ) 8863 8864 def _parse_declare(self) -> exp.Declare | exp.Command: 8865 start = self._prev 8866 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8867 8868 if not expressions or self._curr: 8869 return self._parse_as_command(start) 8870 8871 return self.expression(exp.Declare, expressions=expressions) 8872 8873 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8874 exp_class = exp.Cast if strict else exp.TryCast 8875 8876 if exp_class == exp.TryCast: 8877 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8878 8879 return self.expression(exp_class, **kwargs) 8880 8881 def _parse_json_value(self) -> exp.JSONValue: 8882 this = self._parse_bitwise() 8883 self._match(TokenType.COMMA) 8884 path = self._parse_bitwise() 8885 8886 returning = self._match(TokenType.RETURNING) and self._parse_type() 8887 8888 return self.expression( 8889 exp.JSONValue, 8890 this=this, 8891 path=self.dialect.to_json_path(path), 8892 returning=returning, 8893 on_condition=self._parse_on_condition(), 8894 ) 8895 8896 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8897 def concat_exprs( 8898 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8899 ) -> exp.Expression: 8900 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8901 concat_exprs = [ 8902 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8903 ] 8904 node.set("expressions", concat_exprs) 8905 return node 8906 if len(exprs) == 1: 8907 return exprs[0] 8908 return self.expression(exp.Concat, expressions=args, safe=True) 8909 8910 args = self._parse_csv(self._parse_lambda) 8911 8912 if args: 8913 order = args[-1] if isinstance(args[-1], exp.Order) else None 8914 8915 if order: 8916 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8917 # remove 'expr' from exp.Order and add it back to args 8918 args[-1] = order.this 8919 order.set("this", concat_exprs(order.this, args)) 8920 8921 this = order or concat_exprs(args[0], args) 8922 else: 8923 this = None 8924 8925 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8926 8927 return self.expression(exp.GroupConcat, this=this, separator=separator)
32def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 33 if len(args) == 1 and args[0].is_star: 34 return exp.StarMap(this=args[0]) 35 36 keys = [] 37 values = [] 38 for i in range(0, len(args), 2): 39 keys.append(args[i]) 40 values.append(args[i + 1]) 41 42 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def binary_range_parser( 51 expr_type: t.Type[exp.Expression], reverse_args: bool = False 52) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 53 def _parse_binary_range( 54 self: Parser, this: t.Optional[exp.Expression] 55 ) -> t.Optional[exp.Expression]: 56 expression = self._parse_bitwise() 57 if reverse_args: 58 this, expression = expression, this 59 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 60 61 return _parse_binary_range
64def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 65 # Default argument order is base, expression 66 this = seq_get(args, 0) 67 expression = seq_get(args, 1) 68 69 if expression: 70 if not dialect.LOG_BASE_FIRST: 71 this, expression = expression, this 72 return exp.Log(this=this, expression=expression) 73 74 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
94def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 95 def _builder(args: t.List, dialect: Dialect) -> E: 96 expression = expr_type( 97 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 98 ) 99 if len(args) > 2 and expr_type is exp.JSONExtract: 100 expression.set("expressions", args[2:]) 101 102 return expression 103 104 return _builder
107def build_mod(args: t.List) -> exp.Mod: 108 this = seq_get(args, 0) 109 expression = seq_get(args, 1) 110 111 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 112 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 113 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 114 115 return exp.Mod(this=this, expression=expression)
127def build_array_constructor( 128 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 129) -> exp.Expression: 130 array_exp = exp_class(expressions=args) 131 132 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 133 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 134 135 return array_exp
138def build_convert_timezone( 139 args: t.List, default_source_tz: t.Optional[str] = None 140) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 141 if len(args) == 2: 142 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 143 return exp.ConvertTimezone( 144 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 145 ) 146 147 return exp.ConvertTimezone.from_arg_list(args)
182class Parser(metaclass=_Parser): 183 """ 184 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 185 186 Args: 187 error_level: The desired error level. 188 Default: ErrorLevel.IMMEDIATE 189 error_message_context: The amount of context to capture from a query string when displaying 190 the error message (in number of characters). 191 Default: 100 192 max_errors: Maximum number of error messages to include in a raised ParseError. 193 This is only relevant if error_level is ErrorLevel.RAISE. 194 Default: 3 195 """ 196 197 FUNCTIONS: t.Dict[str, t.Callable] = { 198 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 199 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 200 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 201 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 202 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 203 ), 204 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 205 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 206 ), 207 "CHAR": lambda args: exp.Chr(expressions=args), 208 "CHR": lambda args: exp.Chr(expressions=args), 209 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 210 "CONCAT": lambda args, dialect: exp.Concat( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 216 expressions=args, 217 safe=not dialect.STRICT_STRING_CONCAT, 218 coalesce=dialect.CONCAT_COALESCE, 219 ), 220 "CONVERT_TIMEZONE": build_convert_timezone, 221 "DATE_TO_DATE_STR": lambda args: exp.Cast( 222 this=seq_get(args, 0), 223 to=exp.DataType(this=exp.DataType.Type.TEXT), 224 ), 225 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 226 start=seq_get(args, 0), 227 end=seq_get(args, 1), 228 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 229 ), 230 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 231 "HEX": build_hex, 232 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 233 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 234 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 235 "LIKE": build_like, 236 "LOG": build_logarithm, 237 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 238 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 239 "LOWER": build_lower, 240 "LPAD": lambda args: build_pad(args), 241 "LEFTPAD": lambda args: build_pad(args), 242 "LTRIM": lambda args: build_trim(args), 243 "MOD": build_mod, 244 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 245 "RPAD": lambda args: build_pad(args, is_left=False), 246 "RTRIM": lambda args: build_trim(args, is_left=False), 247 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 248 if len(args) != 2 249 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 250 "STRPOS": exp.StrPosition.from_arg_list, 251 "CHARINDEX": lambda args: build_locate_strposition(args), 252 "INSTR": exp.StrPosition.from_arg_list, 253 "LOCATE": lambda args: build_locate_strposition(args), 254 "TIME_TO_TIME_STR": lambda args: exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 "TO_HEX": build_hex, 259 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 260 this=exp.Cast( 261 this=seq_get(args, 0), 262 to=exp.DataType(this=exp.DataType.Type.TEXT), 263 ), 264 start=exp.Literal.number(1), 265 length=exp.Literal.number(10), 266 ), 267 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 268 "UPPER": build_upper, 269 "VAR_MAP": build_var_map, 270 } 271 272 NO_PAREN_FUNCTIONS = { 273 TokenType.CURRENT_DATE: exp.CurrentDate, 274 TokenType.CURRENT_DATETIME: exp.CurrentDate, 275 TokenType.CURRENT_TIME: exp.CurrentTime, 276 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 277 TokenType.CURRENT_USER: exp.CurrentUser, 278 } 279 280 STRUCT_TYPE_TOKENS = { 281 TokenType.NESTED, 282 TokenType.OBJECT, 283 TokenType.STRUCT, 284 TokenType.UNION, 285 } 286 287 NESTED_TYPE_TOKENS = { 288 TokenType.ARRAY, 289 TokenType.LIST, 290 TokenType.LOWCARDINALITY, 291 TokenType.MAP, 292 TokenType.NULLABLE, 293 TokenType.RANGE, 294 *STRUCT_TYPE_TOKENS, 295 } 296 297 ENUM_TYPE_TOKENS = { 298 TokenType.DYNAMIC, 299 TokenType.ENUM, 300 TokenType.ENUM8, 301 TokenType.ENUM16, 302 } 303 304 AGGREGATE_TYPE_TOKENS = { 305 TokenType.AGGREGATEFUNCTION, 306 TokenType.SIMPLEAGGREGATEFUNCTION, 307 } 308 309 TYPE_TOKENS = { 310 TokenType.BIT, 311 TokenType.BOOLEAN, 312 TokenType.TINYINT, 313 TokenType.UTINYINT, 314 TokenType.SMALLINT, 315 TokenType.USMALLINT, 316 TokenType.INT, 317 TokenType.UINT, 318 TokenType.BIGINT, 319 TokenType.UBIGINT, 320 TokenType.INT128, 321 TokenType.UINT128, 322 TokenType.INT256, 323 TokenType.UINT256, 324 TokenType.MEDIUMINT, 325 TokenType.UMEDIUMINT, 326 TokenType.FIXEDSTRING, 327 TokenType.FLOAT, 328 TokenType.DOUBLE, 329 TokenType.UDOUBLE, 330 TokenType.CHAR, 331 TokenType.NCHAR, 332 TokenType.VARCHAR, 333 TokenType.NVARCHAR, 334 TokenType.BPCHAR, 335 TokenType.TEXT, 336 TokenType.MEDIUMTEXT, 337 TokenType.LONGTEXT, 338 TokenType.BLOB, 339 TokenType.MEDIUMBLOB, 340 TokenType.LONGBLOB, 341 TokenType.BINARY, 342 TokenType.VARBINARY, 343 TokenType.JSON, 344 TokenType.JSONB, 345 TokenType.INTERVAL, 346 TokenType.TINYBLOB, 347 TokenType.TINYTEXT, 348 TokenType.TIME, 349 TokenType.TIMETZ, 350 TokenType.TIMESTAMP, 351 TokenType.TIMESTAMP_S, 352 TokenType.TIMESTAMP_MS, 353 TokenType.TIMESTAMP_NS, 354 TokenType.TIMESTAMPTZ, 355 TokenType.TIMESTAMPLTZ, 356 TokenType.TIMESTAMPNTZ, 357 TokenType.DATETIME, 358 TokenType.DATETIME2, 359 TokenType.DATETIME64, 360 TokenType.SMALLDATETIME, 361 TokenType.DATE, 362 TokenType.DATE32, 363 TokenType.INT4RANGE, 364 TokenType.INT4MULTIRANGE, 365 TokenType.INT8RANGE, 366 TokenType.INT8MULTIRANGE, 367 TokenType.NUMRANGE, 368 TokenType.NUMMULTIRANGE, 369 TokenType.TSRANGE, 370 TokenType.TSMULTIRANGE, 371 TokenType.TSTZRANGE, 372 TokenType.TSTZMULTIRANGE, 373 TokenType.DATERANGE, 374 TokenType.DATEMULTIRANGE, 375 TokenType.DECIMAL, 376 TokenType.DECIMAL32, 377 TokenType.DECIMAL64, 378 TokenType.DECIMAL128, 379 TokenType.DECIMAL256, 380 TokenType.UDECIMAL, 381 TokenType.BIGDECIMAL, 382 TokenType.UUID, 383 TokenType.GEOGRAPHY, 384 TokenType.GEOGRAPHYPOINT, 385 TokenType.GEOMETRY, 386 TokenType.POINT, 387 TokenType.RING, 388 TokenType.LINESTRING, 389 TokenType.MULTILINESTRING, 390 TokenType.POLYGON, 391 TokenType.MULTIPOLYGON, 392 TokenType.HLLSKETCH, 393 TokenType.HSTORE, 394 TokenType.PSEUDO_TYPE, 395 TokenType.SUPER, 396 TokenType.SERIAL, 397 TokenType.SMALLSERIAL, 398 TokenType.BIGSERIAL, 399 TokenType.XML, 400 TokenType.YEAR, 401 TokenType.USERDEFINED, 402 TokenType.MONEY, 403 TokenType.SMALLMONEY, 404 TokenType.ROWVERSION, 405 TokenType.IMAGE, 406 TokenType.VARIANT, 407 TokenType.VECTOR, 408 TokenType.VOID, 409 TokenType.OBJECT, 410 TokenType.OBJECT_IDENTIFIER, 411 TokenType.INET, 412 TokenType.IPADDRESS, 413 TokenType.IPPREFIX, 414 TokenType.IPV4, 415 TokenType.IPV6, 416 TokenType.UNKNOWN, 417 TokenType.NOTHING, 418 TokenType.NULL, 419 TokenType.NAME, 420 TokenType.TDIGEST, 421 TokenType.DYNAMIC, 422 *ENUM_TYPE_TOKENS, 423 *NESTED_TYPE_TOKENS, 424 *AGGREGATE_TYPE_TOKENS, 425 } 426 427 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 428 TokenType.BIGINT: TokenType.UBIGINT, 429 TokenType.INT: TokenType.UINT, 430 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 431 TokenType.SMALLINT: TokenType.USMALLINT, 432 TokenType.TINYINT: TokenType.UTINYINT, 433 TokenType.DECIMAL: TokenType.UDECIMAL, 434 TokenType.DOUBLE: TokenType.UDOUBLE, 435 } 436 437 SUBQUERY_PREDICATES = { 438 TokenType.ANY: exp.Any, 439 TokenType.ALL: exp.All, 440 TokenType.EXISTS: exp.Exists, 441 TokenType.SOME: exp.Any, 442 } 443 444 RESERVED_TOKENS = { 445 *Tokenizer.SINGLE_TOKENS.values(), 446 TokenType.SELECT, 447 } - {TokenType.IDENTIFIER} 448 449 DB_CREATABLES = { 450 TokenType.DATABASE, 451 TokenType.DICTIONARY, 452 TokenType.FILE_FORMAT, 453 TokenType.MODEL, 454 TokenType.NAMESPACE, 455 TokenType.SCHEMA, 456 TokenType.SEMANTIC_VIEW, 457 TokenType.SEQUENCE, 458 TokenType.SINK, 459 TokenType.SOURCE, 460 TokenType.STAGE, 461 TokenType.STORAGE_INTEGRATION, 462 TokenType.STREAMLIT, 463 TokenType.TABLE, 464 TokenType.TAG, 465 TokenType.VIEW, 466 TokenType.WAREHOUSE, 467 } 468 469 CREATABLES = { 470 TokenType.COLUMN, 471 TokenType.CONSTRAINT, 472 TokenType.FOREIGN_KEY, 473 TokenType.FUNCTION, 474 TokenType.INDEX, 475 TokenType.PROCEDURE, 476 *DB_CREATABLES, 477 } 478 479 ALTERABLES = { 480 TokenType.INDEX, 481 TokenType.TABLE, 482 TokenType.VIEW, 483 TokenType.SESSION, 484 } 485 486 # Tokens that can represent identifiers 487 ID_VAR_TOKENS = { 488 TokenType.ALL, 489 TokenType.ATTACH, 490 TokenType.VAR, 491 TokenType.ANTI, 492 TokenType.APPLY, 493 TokenType.ASC, 494 TokenType.ASOF, 495 TokenType.AUTO_INCREMENT, 496 TokenType.BEGIN, 497 TokenType.BPCHAR, 498 TokenType.CACHE, 499 TokenType.CASE, 500 TokenType.COLLATE, 501 TokenType.COMMAND, 502 TokenType.COMMENT, 503 TokenType.COMMIT, 504 TokenType.CONSTRAINT, 505 TokenType.COPY, 506 TokenType.CUBE, 507 TokenType.CURRENT_SCHEMA, 508 TokenType.DEFAULT, 509 TokenType.DELETE, 510 TokenType.DESC, 511 TokenType.DESCRIBE, 512 TokenType.DETACH, 513 TokenType.DICTIONARY, 514 TokenType.DIV, 515 TokenType.END, 516 TokenType.EXECUTE, 517 TokenType.EXPORT, 518 TokenType.ESCAPE, 519 TokenType.FALSE, 520 TokenType.FIRST, 521 TokenType.FILTER, 522 TokenType.FINAL, 523 TokenType.FORMAT, 524 TokenType.FULL, 525 TokenType.GET, 526 TokenType.IDENTIFIER, 527 TokenType.IS, 528 TokenType.ISNULL, 529 TokenType.INTERVAL, 530 TokenType.KEEP, 531 TokenType.KILL, 532 TokenType.LEFT, 533 TokenType.LIMIT, 534 TokenType.LOAD, 535 TokenType.LOCK, 536 TokenType.MERGE, 537 TokenType.NATURAL, 538 TokenType.NEXT, 539 TokenType.OFFSET, 540 TokenType.OPERATOR, 541 TokenType.ORDINALITY, 542 TokenType.OVERLAPS, 543 TokenType.OVERWRITE, 544 TokenType.PARTITION, 545 TokenType.PERCENT, 546 TokenType.PIVOT, 547 TokenType.PRAGMA, 548 TokenType.PUT, 549 TokenType.RANGE, 550 TokenType.RECURSIVE, 551 TokenType.REFERENCES, 552 TokenType.REFRESH, 553 TokenType.RENAME, 554 TokenType.REPLACE, 555 TokenType.RIGHT, 556 TokenType.ROLLUP, 557 TokenType.ROW, 558 TokenType.ROWS, 559 TokenType.SEMI, 560 TokenType.SET, 561 TokenType.SETTINGS, 562 TokenType.SHOW, 563 TokenType.TEMPORARY, 564 TokenType.TOP, 565 TokenType.TRUE, 566 TokenType.TRUNCATE, 567 TokenType.UNIQUE, 568 TokenType.UNNEST, 569 TokenType.UNPIVOT, 570 TokenType.UPDATE, 571 TokenType.USE, 572 TokenType.VOLATILE, 573 TokenType.WINDOW, 574 *ALTERABLES, 575 *CREATABLES, 576 *SUBQUERY_PREDICATES, 577 *TYPE_TOKENS, 578 *NO_PAREN_FUNCTIONS, 579 } 580 ID_VAR_TOKENS.remove(TokenType.UNION) 581 582 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 583 TokenType.ANTI, 584 TokenType.ASOF, 585 TokenType.FULL, 586 TokenType.LEFT, 587 TokenType.LOCK, 588 TokenType.NATURAL, 589 TokenType.RIGHT, 590 TokenType.SEMI, 591 TokenType.WINDOW, 592 } 593 594 ALIAS_TOKENS = ID_VAR_TOKENS 595 596 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 597 598 ARRAY_CONSTRUCTORS = { 599 "ARRAY": exp.Array, 600 "LIST": exp.List, 601 } 602 603 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 604 605 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 606 607 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 608 609 FUNC_TOKENS = { 610 TokenType.COLLATE, 611 TokenType.COMMAND, 612 TokenType.CURRENT_DATE, 613 TokenType.CURRENT_DATETIME, 614 TokenType.CURRENT_SCHEMA, 615 TokenType.CURRENT_TIMESTAMP, 616 TokenType.CURRENT_TIME, 617 TokenType.CURRENT_USER, 618 TokenType.FILTER, 619 TokenType.FIRST, 620 TokenType.FORMAT, 621 TokenType.GET, 622 TokenType.GLOB, 623 TokenType.IDENTIFIER, 624 TokenType.INDEX, 625 TokenType.ISNULL, 626 TokenType.ILIKE, 627 TokenType.INSERT, 628 TokenType.LIKE, 629 TokenType.MERGE, 630 TokenType.NEXT, 631 TokenType.OFFSET, 632 TokenType.PRIMARY_KEY, 633 TokenType.RANGE, 634 TokenType.REPLACE, 635 TokenType.RLIKE, 636 TokenType.ROW, 637 TokenType.UNNEST, 638 TokenType.VAR, 639 TokenType.LEFT, 640 TokenType.RIGHT, 641 TokenType.SEQUENCE, 642 TokenType.DATE, 643 TokenType.DATETIME, 644 TokenType.TABLE, 645 TokenType.TIMESTAMP, 646 TokenType.TIMESTAMPTZ, 647 TokenType.TRUNCATE, 648 TokenType.UTC_DATE, 649 TokenType.UTC_TIME, 650 TokenType.UTC_TIMESTAMP, 651 TokenType.WINDOW, 652 TokenType.XOR, 653 *TYPE_TOKENS, 654 *SUBQUERY_PREDICATES, 655 } 656 657 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 658 TokenType.AND: exp.And, 659 } 660 661 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 662 TokenType.COLON_EQ: exp.PropertyEQ, 663 } 664 665 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 666 TokenType.OR: exp.Or, 667 } 668 669 EQUALITY = { 670 TokenType.EQ: exp.EQ, 671 TokenType.NEQ: exp.NEQ, 672 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 673 } 674 675 COMPARISON = { 676 TokenType.GT: exp.GT, 677 TokenType.GTE: exp.GTE, 678 TokenType.LT: exp.LT, 679 TokenType.LTE: exp.LTE, 680 } 681 682 BITWISE = { 683 TokenType.AMP: exp.BitwiseAnd, 684 TokenType.CARET: exp.BitwiseXor, 685 TokenType.PIPE: exp.BitwiseOr, 686 } 687 688 TERM = { 689 TokenType.DASH: exp.Sub, 690 TokenType.PLUS: exp.Add, 691 TokenType.MOD: exp.Mod, 692 TokenType.COLLATE: exp.Collate, 693 } 694 695 FACTOR = { 696 TokenType.DIV: exp.IntDiv, 697 TokenType.LR_ARROW: exp.Distance, 698 TokenType.SLASH: exp.Div, 699 TokenType.STAR: exp.Mul, 700 } 701 702 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 703 704 TIMES = { 705 TokenType.TIME, 706 TokenType.TIMETZ, 707 } 708 709 TIMESTAMPS = { 710 TokenType.TIMESTAMP, 711 TokenType.TIMESTAMPNTZ, 712 TokenType.TIMESTAMPTZ, 713 TokenType.TIMESTAMPLTZ, 714 *TIMES, 715 } 716 717 SET_OPERATIONS = { 718 TokenType.UNION, 719 TokenType.INTERSECT, 720 TokenType.EXCEPT, 721 } 722 723 JOIN_METHODS = { 724 TokenType.ASOF, 725 TokenType.NATURAL, 726 TokenType.POSITIONAL, 727 } 728 729 JOIN_SIDES = { 730 TokenType.LEFT, 731 TokenType.RIGHT, 732 TokenType.FULL, 733 } 734 735 JOIN_KINDS = { 736 TokenType.ANTI, 737 TokenType.CROSS, 738 TokenType.INNER, 739 TokenType.OUTER, 740 TokenType.SEMI, 741 TokenType.STRAIGHT_JOIN, 742 } 743 744 JOIN_HINTS: t.Set[str] = set() 745 746 LAMBDAS = { 747 TokenType.ARROW: lambda self, expressions: self.expression( 748 exp.Lambda, 749 this=self._replace_lambda( 750 self._parse_assignment(), 751 expressions, 752 ), 753 expressions=expressions, 754 ), 755 TokenType.FARROW: lambda self, expressions: self.expression( 756 exp.Kwarg, 757 this=exp.var(expressions[0].name), 758 expression=self._parse_assignment(), 759 ), 760 } 761 762 COLUMN_OPERATORS = { 763 TokenType.DOT: None, 764 TokenType.DOTCOLON: lambda self, this, to: self.expression( 765 exp.JSONCast, 766 this=this, 767 to=to, 768 ), 769 TokenType.DCOLON: lambda self, this, to: self.build_cast( 770 strict=self.STRICT_CAST, this=this, to=to 771 ), 772 TokenType.ARROW: lambda self, this, path: self.expression( 773 exp.JSONExtract, 774 this=this, 775 expression=self.dialect.to_json_path(path), 776 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 777 ), 778 TokenType.DARROW: lambda self, this, path: self.expression( 779 exp.JSONExtractScalar, 780 this=this, 781 expression=self.dialect.to_json_path(path), 782 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 783 ), 784 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 785 exp.JSONBExtract, 786 this=this, 787 expression=path, 788 ), 789 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 790 exp.JSONBExtractScalar, 791 this=this, 792 expression=path, 793 ), 794 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 795 exp.JSONBContains, 796 this=this, 797 expression=key, 798 ), 799 } 800 801 CAST_COLUMN_OPERATORS = { 802 TokenType.DOTCOLON, 803 TokenType.DCOLON, 804 } 805 806 EXPRESSION_PARSERS = { 807 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 808 exp.Column: lambda self: self._parse_column(), 809 exp.Condition: lambda self: self._parse_assignment(), 810 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 811 exp.Expression: lambda self: self._parse_expression(), 812 exp.From: lambda self: self._parse_from(joins=True), 813 exp.GrantPrincipal: lambda self: self._parse_grant_principal(), 814 exp.GrantPrivilege: lambda self: self._parse_grant_privilege(), 815 exp.Group: lambda self: self._parse_group(), 816 exp.Having: lambda self: self._parse_having(), 817 exp.Hint: lambda self: self._parse_hint_body(), 818 exp.Identifier: lambda self: self._parse_id_var(), 819 exp.Join: lambda self: self._parse_join(), 820 exp.Lambda: lambda self: self._parse_lambda(), 821 exp.Lateral: lambda self: self._parse_lateral(), 822 exp.Limit: lambda self: self._parse_limit(), 823 exp.Offset: lambda self: self._parse_offset(), 824 exp.Order: lambda self: self._parse_order(), 825 exp.Ordered: lambda self: self._parse_ordered(), 826 exp.Properties: lambda self: self._parse_properties(), 827 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 828 exp.Qualify: lambda self: self._parse_qualify(), 829 exp.Returning: lambda self: self._parse_returning(), 830 exp.Select: lambda self: self._parse_select(), 831 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 832 exp.Table: lambda self: self._parse_table_parts(), 833 exp.TableAlias: lambda self: self._parse_table_alias(), 834 exp.Tuple: lambda self: self._parse_value(values=False), 835 exp.Whens: lambda self: self._parse_when_matched(), 836 exp.Where: lambda self: self._parse_where(), 837 exp.Window: lambda self: self._parse_named_window(), 838 exp.With: lambda self: self._parse_with(), 839 "JOIN_TYPE": lambda self: self._parse_join_parts(), 840 } 841 842 STATEMENT_PARSERS = { 843 TokenType.ALTER: lambda self: self._parse_alter(), 844 TokenType.ANALYZE: lambda self: self._parse_analyze(), 845 TokenType.BEGIN: lambda self: self._parse_transaction(), 846 TokenType.CACHE: lambda self: self._parse_cache(), 847 TokenType.COMMENT: lambda self: self._parse_comment(), 848 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 849 TokenType.COPY: lambda self: self._parse_copy(), 850 TokenType.CREATE: lambda self: self._parse_create(), 851 TokenType.DELETE: lambda self: self._parse_delete(), 852 TokenType.DESC: lambda self: self._parse_describe(), 853 TokenType.DESCRIBE: lambda self: self._parse_describe(), 854 TokenType.DROP: lambda self: self._parse_drop(), 855 TokenType.GRANT: lambda self: self._parse_grant(), 856 TokenType.REVOKE: lambda self: self._parse_revoke(), 857 TokenType.INSERT: lambda self: self._parse_insert(), 858 TokenType.KILL: lambda self: self._parse_kill(), 859 TokenType.LOAD: lambda self: self._parse_load(), 860 TokenType.MERGE: lambda self: self._parse_merge(), 861 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 862 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 863 TokenType.REFRESH: lambda self: self._parse_refresh(), 864 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 865 TokenType.SET: lambda self: self._parse_set(), 866 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 867 TokenType.UNCACHE: lambda self: self._parse_uncache(), 868 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 869 TokenType.UPDATE: lambda self: self._parse_update(), 870 TokenType.USE: lambda self: self._parse_use(), 871 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 872 } 873 874 UNARY_PARSERS = { 875 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 876 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 877 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 878 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 879 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 880 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 881 } 882 883 STRING_PARSERS = { 884 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 885 exp.RawString, this=token.text 886 ), 887 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 888 exp.National, this=token.text 889 ), 890 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 891 TokenType.STRING: lambda self, token: self.expression( 892 exp.Literal, this=token.text, is_string=True 893 ), 894 TokenType.UNICODE_STRING: lambda self, token: self.expression( 895 exp.UnicodeString, 896 this=token.text, 897 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 898 ), 899 } 900 901 NUMERIC_PARSERS = { 902 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 903 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 904 TokenType.HEX_STRING: lambda self, token: self.expression( 905 exp.HexString, 906 this=token.text, 907 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 908 ), 909 TokenType.NUMBER: lambda self, token: self.expression( 910 exp.Literal, this=token.text, is_string=False 911 ), 912 } 913 914 PRIMARY_PARSERS = { 915 **STRING_PARSERS, 916 **NUMERIC_PARSERS, 917 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 918 TokenType.NULL: lambda self, _: self.expression(exp.Null), 919 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 920 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 921 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 922 TokenType.STAR: lambda self, _: self._parse_star_ops(), 923 } 924 925 PLACEHOLDER_PARSERS = { 926 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 927 TokenType.PARAMETER: lambda self: self._parse_parameter(), 928 TokenType.COLON: lambda self: ( 929 self.expression(exp.Placeholder, this=self._prev.text) 930 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 931 else None 932 ), 933 } 934 935 RANGE_PARSERS = { 936 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 937 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 938 TokenType.GLOB: binary_range_parser(exp.Glob), 939 TokenType.ILIKE: binary_range_parser(exp.ILike), 940 TokenType.IN: lambda self, this: self._parse_in(this), 941 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 942 TokenType.IS: lambda self, this: self._parse_is(this), 943 TokenType.LIKE: binary_range_parser(exp.Like), 944 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 945 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 946 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 947 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 948 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 949 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 950 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 951 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 952 } 953 954 PIPE_SYNTAX_TRANSFORM_PARSERS = { 955 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 956 "AS": lambda self, query: self._build_pipe_cte( 957 query, [exp.Star()], self._parse_table_alias() 958 ), 959 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 960 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 961 "ORDER BY": lambda self, query: query.order_by( 962 self._parse_order(), append=False, copy=False 963 ), 964 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 965 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 966 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 967 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 968 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 969 } 970 971 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 972 "ALLOWED_VALUES": lambda self: self.expression( 973 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 974 ), 975 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 976 "AUTO": lambda self: self._parse_auto_property(), 977 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 978 "BACKUP": lambda self: self.expression( 979 exp.BackupProperty, this=self._parse_var(any_token=True) 980 ), 981 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 982 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 983 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 984 "CHECKSUM": lambda self: self._parse_checksum(), 985 "CLUSTER BY": lambda self: self._parse_cluster(), 986 "CLUSTERED": lambda self: self._parse_clustered_by(), 987 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 988 exp.CollateProperty, **kwargs 989 ), 990 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 991 "CONTAINS": lambda self: self._parse_contains_property(), 992 "COPY": lambda self: self._parse_copy_property(), 993 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 994 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 995 "DEFINER": lambda self: self._parse_definer(), 996 "DETERMINISTIC": lambda self: self.expression( 997 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 998 ), 999 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 1000 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 1001 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 1002 "DISTKEY": lambda self: self._parse_distkey(), 1003 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1004 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 1005 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1006 "ENVIRONMENT": lambda self: self.expression( 1007 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1008 ), 1009 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1010 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1011 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1012 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1013 "FREESPACE": lambda self: self._parse_freespace(), 1014 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1015 "HEAP": lambda self: self.expression(exp.HeapProperty), 1016 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1017 "IMMUTABLE": lambda self: self.expression( 1018 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1019 ), 1020 "INHERITS": lambda self: self.expression( 1021 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1022 ), 1023 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1024 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1025 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1026 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1027 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1028 "LIKE": lambda self: self._parse_create_like(), 1029 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1030 "LOCK": lambda self: self._parse_locking(), 1031 "LOCKING": lambda self: self._parse_locking(), 1032 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1033 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1034 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1035 "MODIFIES": lambda self: self._parse_modifies_property(), 1036 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1037 "NO": lambda self: self._parse_no_property(), 1038 "ON": lambda self: self._parse_on_property(), 1039 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1040 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1041 "PARTITION": lambda self: self._parse_partitioned_of(), 1042 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1043 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1044 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1045 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1046 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1047 "READS": lambda self: self._parse_reads_property(), 1048 "REMOTE": lambda self: self._parse_remote_with_connection(), 1049 "RETURNS": lambda self: self._parse_returns(), 1050 "STRICT": lambda self: self.expression(exp.StrictProperty), 1051 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1052 "ROW": lambda self: self._parse_row(), 1053 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1054 "SAMPLE": lambda self: self.expression( 1055 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1056 ), 1057 "SECURE": lambda self: self.expression(exp.SecureProperty), 1058 "SECURITY": lambda self: self._parse_security(), 1059 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1060 "SETTINGS": lambda self: self._parse_settings_property(), 1061 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1062 "SORTKEY": lambda self: self._parse_sortkey(), 1063 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1064 "STABLE": lambda self: self.expression( 1065 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1066 ), 1067 "STORED": lambda self: self._parse_stored(), 1068 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1069 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1070 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1071 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1072 "TO": lambda self: self._parse_to_table(), 1073 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1074 "TRANSFORM": lambda self: self.expression( 1075 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1076 ), 1077 "TTL": lambda self: self._parse_ttl(), 1078 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1079 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1080 "VOLATILE": lambda self: self._parse_volatile_property(), 1081 "WITH": lambda self: self._parse_with_property(), 1082 } 1083 1084 CONSTRAINT_PARSERS = { 1085 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1086 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1087 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1088 "CHARACTER SET": lambda self: self.expression( 1089 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1090 ), 1091 "CHECK": lambda self: self.expression( 1092 exp.CheckColumnConstraint, 1093 this=self._parse_wrapped(self._parse_assignment), 1094 enforced=self._match_text_seq("ENFORCED"), 1095 ), 1096 "COLLATE": lambda self: self.expression( 1097 exp.CollateColumnConstraint, 1098 this=self._parse_identifier() or self._parse_column(), 1099 ), 1100 "COMMENT": lambda self: self.expression( 1101 exp.CommentColumnConstraint, this=self._parse_string() 1102 ), 1103 "COMPRESS": lambda self: self._parse_compress(), 1104 "CLUSTERED": lambda self: self.expression( 1105 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1106 ), 1107 "NONCLUSTERED": lambda self: self.expression( 1108 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1109 ), 1110 "DEFAULT": lambda self: self.expression( 1111 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1112 ), 1113 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1114 "EPHEMERAL": lambda self: self.expression( 1115 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1116 ), 1117 "EXCLUDE": lambda self: self.expression( 1118 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1119 ), 1120 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1121 "FORMAT": lambda self: self.expression( 1122 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1123 ), 1124 "GENERATED": lambda self: self._parse_generated_as_identity(), 1125 "IDENTITY": lambda self: self._parse_auto_increment(), 1126 "INLINE": lambda self: self._parse_inline(), 1127 "LIKE": lambda self: self._parse_create_like(), 1128 "NOT": lambda self: self._parse_not_constraint(), 1129 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1130 "ON": lambda self: ( 1131 self._match(TokenType.UPDATE) 1132 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1133 ) 1134 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1135 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1136 "PERIOD": lambda self: self._parse_period_for_system_time(), 1137 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1138 "REFERENCES": lambda self: self._parse_references(match=False), 1139 "TITLE": lambda self: self.expression( 1140 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1141 ), 1142 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1143 "UNIQUE": lambda self: self._parse_unique(), 1144 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1145 "WITH": lambda self: self.expression( 1146 exp.Properties, expressions=self._parse_wrapped_properties() 1147 ), 1148 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1149 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1150 } 1151 1152 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1153 if not self._match(TokenType.L_PAREN, advance=False): 1154 # Partitioning by bucket or truncate follows the syntax: 1155 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1156 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1157 self._retreat(self._index - 1) 1158 return None 1159 1160 klass = ( 1161 exp.PartitionedByBucket 1162 if self._prev.text.upper() == "BUCKET" 1163 else exp.PartitionByTruncate 1164 ) 1165 1166 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1167 this, expression = seq_get(args, 0), seq_get(args, 1) 1168 1169 if isinstance(this, exp.Literal): 1170 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1171 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1172 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1173 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1174 # 1175 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1176 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1177 this, expression = expression, this 1178 1179 return self.expression(klass, this=this, expression=expression) 1180 1181 ALTER_PARSERS = { 1182 "ADD": lambda self: self._parse_alter_table_add(), 1183 "AS": lambda self: self._parse_select(), 1184 "ALTER": lambda self: self._parse_alter_table_alter(), 1185 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1186 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1187 "DROP": lambda self: self._parse_alter_table_drop(), 1188 "RENAME": lambda self: self._parse_alter_table_rename(), 1189 "SET": lambda self: self._parse_alter_table_set(), 1190 "SWAP": lambda self: self.expression( 1191 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1192 ), 1193 } 1194 1195 ALTER_ALTER_PARSERS = { 1196 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1197 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1198 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1199 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1200 } 1201 1202 SCHEMA_UNNAMED_CONSTRAINTS = { 1203 "CHECK", 1204 "EXCLUDE", 1205 "FOREIGN KEY", 1206 "LIKE", 1207 "PERIOD", 1208 "PRIMARY KEY", 1209 "UNIQUE", 1210 "BUCKET", 1211 "TRUNCATE", 1212 } 1213 1214 NO_PAREN_FUNCTION_PARSERS = { 1215 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1216 "CASE": lambda self: self._parse_case(), 1217 "CONNECT_BY_ROOT": lambda self: self.expression( 1218 exp.ConnectByRoot, this=self._parse_column() 1219 ), 1220 "IF": lambda self: self._parse_if(), 1221 } 1222 1223 INVALID_FUNC_NAME_TOKENS = { 1224 TokenType.IDENTIFIER, 1225 TokenType.STRING, 1226 } 1227 1228 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1229 1230 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1231 1232 FUNCTION_PARSERS = { 1233 **{ 1234 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1235 }, 1236 **{ 1237 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1238 }, 1239 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1240 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1241 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1242 "DECODE": lambda self: self._parse_decode(), 1243 "EXTRACT": lambda self: self._parse_extract(), 1244 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1245 "GAP_FILL": lambda self: self._parse_gap_fill(), 1246 "JSON_OBJECT": lambda self: self._parse_json_object(), 1247 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1248 "JSON_TABLE": lambda self: self._parse_json_table(), 1249 "MATCH": lambda self: self._parse_match_against(), 1250 "NORMALIZE": lambda self: self._parse_normalize(), 1251 "OPENJSON": lambda self: self._parse_open_json(), 1252 "OVERLAY": lambda self: self._parse_overlay(), 1253 "POSITION": lambda self: self._parse_position(), 1254 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1255 "STRING_AGG": lambda self: self._parse_string_agg(), 1256 "SUBSTRING": lambda self: self._parse_substring(), 1257 "TRIM": lambda self: self._parse_trim(), 1258 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1259 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1260 "XMLELEMENT": lambda self: self.expression( 1261 exp.XMLElement, 1262 this=self._match_text_seq("NAME") and self._parse_id_var(), 1263 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1264 ), 1265 "XMLTABLE": lambda self: self._parse_xml_table(), 1266 } 1267 1268 QUERY_MODIFIER_PARSERS = { 1269 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1270 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1271 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1272 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1273 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1274 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1275 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1276 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1277 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1278 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1279 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1280 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1281 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1282 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1283 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1284 TokenType.CLUSTER_BY: lambda self: ( 1285 "cluster", 1286 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1287 ), 1288 TokenType.DISTRIBUTE_BY: lambda self: ( 1289 "distribute", 1290 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1291 ), 1292 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1293 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1294 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1295 } 1296 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1297 1298 SET_PARSERS = { 1299 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1300 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1301 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1302 "TRANSACTION": lambda self: self._parse_set_transaction(), 1303 } 1304 1305 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1306 1307 TYPE_LITERAL_PARSERS = { 1308 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1309 } 1310 1311 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1312 1313 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1314 1315 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1316 1317 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1318 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1319 "ISOLATION": ( 1320 ("LEVEL", "REPEATABLE", "READ"), 1321 ("LEVEL", "READ", "COMMITTED"), 1322 ("LEVEL", "READ", "UNCOMITTED"), 1323 ("LEVEL", "SERIALIZABLE"), 1324 ), 1325 "READ": ("WRITE", "ONLY"), 1326 } 1327 1328 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1329 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1330 ) 1331 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1332 1333 CREATE_SEQUENCE: OPTIONS_TYPE = { 1334 "SCALE": ("EXTEND", "NOEXTEND"), 1335 "SHARD": ("EXTEND", "NOEXTEND"), 1336 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1337 **dict.fromkeys( 1338 ( 1339 "SESSION", 1340 "GLOBAL", 1341 "KEEP", 1342 "NOKEEP", 1343 "ORDER", 1344 "NOORDER", 1345 "NOCACHE", 1346 "CYCLE", 1347 "NOCYCLE", 1348 "NOMINVALUE", 1349 "NOMAXVALUE", 1350 "NOSCALE", 1351 "NOSHARD", 1352 ), 1353 tuple(), 1354 ), 1355 } 1356 1357 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1358 1359 USABLES: OPTIONS_TYPE = dict.fromkeys( 1360 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1361 ) 1362 1363 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1364 1365 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1366 "TYPE": ("EVOLUTION",), 1367 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1368 } 1369 1370 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1371 1372 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1373 1374 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1375 "NOT": ("ENFORCED",), 1376 "MATCH": ( 1377 "FULL", 1378 "PARTIAL", 1379 "SIMPLE", 1380 ), 1381 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1382 "USING": ( 1383 "BTREE", 1384 "HASH", 1385 ), 1386 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1387 } 1388 1389 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1390 "NO": ("OTHERS",), 1391 "CURRENT": ("ROW",), 1392 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1393 } 1394 1395 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1396 1397 CLONE_KEYWORDS = {"CLONE", "COPY"} 1398 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1399 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1400 1401 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1402 1403 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1404 1405 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1406 1407 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1408 1409 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.RANGE, TokenType.ROWS} 1410 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1411 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1412 1413 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1414 1415 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1416 1417 ADD_CONSTRAINT_TOKENS = { 1418 TokenType.CONSTRAINT, 1419 TokenType.FOREIGN_KEY, 1420 TokenType.INDEX, 1421 TokenType.KEY, 1422 TokenType.PRIMARY_KEY, 1423 TokenType.UNIQUE, 1424 } 1425 1426 DISTINCT_TOKENS = {TokenType.DISTINCT} 1427 1428 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1429 1430 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1431 1432 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1433 1434 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1435 1436 ODBC_DATETIME_LITERALS = { 1437 "d": exp.Date, 1438 "t": exp.Time, 1439 "ts": exp.Timestamp, 1440 } 1441 1442 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1443 1444 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1445 1446 # The style options for the DESCRIBE statement 1447 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1448 1449 # The style options for the ANALYZE statement 1450 ANALYZE_STYLES = { 1451 "BUFFER_USAGE_LIMIT", 1452 "FULL", 1453 "LOCAL", 1454 "NO_WRITE_TO_BINLOG", 1455 "SAMPLE", 1456 "SKIP_LOCKED", 1457 "VERBOSE", 1458 } 1459 1460 ANALYZE_EXPRESSION_PARSERS = { 1461 "ALL": lambda self: self._parse_analyze_columns(), 1462 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1463 "DELETE": lambda self: self._parse_analyze_delete(), 1464 "DROP": lambda self: self._parse_analyze_histogram(), 1465 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1466 "LIST": lambda self: self._parse_analyze_list(), 1467 "PREDICATE": lambda self: self._parse_analyze_columns(), 1468 "UPDATE": lambda self: self._parse_analyze_histogram(), 1469 "VALIDATE": lambda self: self._parse_analyze_validate(), 1470 } 1471 1472 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1473 1474 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1475 1476 OPERATION_MODIFIERS: t.Set[str] = set() 1477 1478 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1479 1480 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1481 1482 STRICT_CAST = True 1483 1484 PREFIXED_PIVOT_COLUMNS = False 1485 IDENTIFY_PIVOT_STRINGS = False 1486 1487 LOG_DEFAULTS_TO_LN = False 1488 1489 # Whether the table sample clause expects CSV syntax 1490 TABLESAMPLE_CSV = False 1491 1492 # The default method used for table sampling 1493 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1494 1495 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1496 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1497 1498 # Whether the TRIM function expects the characters to trim as its first argument 1499 TRIM_PATTERN_FIRST = False 1500 1501 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1502 STRING_ALIASES = False 1503 1504 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1505 MODIFIERS_ATTACHED_TO_SET_OP = True 1506 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1507 1508 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1509 NO_PAREN_IF_COMMANDS = True 1510 1511 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1512 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1513 1514 # Whether the `:` operator is used to extract a value from a VARIANT column 1515 COLON_IS_VARIANT_EXTRACT = False 1516 1517 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1518 # If this is True and '(' is not found, the keyword will be treated as an identifier 1519 VALUES_FOLLOWED_BY_PAREN = True 1520 1521 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1522 SUPPORTS_IMPLICIT_UNNEST = False 1523 1524 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1525 INTERVAL_SPANS = True 1526 1527 # Whether a PARTITION clause can follow a table reference 1528 SUPPORTS_PARTITION_SELECTION = False 1529 1530 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1531 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1532 1533 # Whether the 'AS' keyword is optional in the CTE definition syntax 1534 OPTIONAL_ALIAS_TOKEN_CTE = True 1535 1536 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1537 ALTER_RENAME_REQUIRES_COLUMN = True 1538 1539 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1540 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1541 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1542 # as BigQuery, where all joins have the same precedence. 1543 JOINS_HAVE_EQUAL_PRECEDENCE = False 1544 1545 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1546 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1547 1548 # Whether map literals support arbitrary expressions as keys. 1549 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1550 # When False, keys are typically restricted to identifiers. 1551 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1552 1553 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1554 # is true for Snowflake but not for BigQuery which can also process strings 1555 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1556 1557 # Dialects like Databricks support JOINS without join criteria 1558 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1559 ADD_JOIN_ON_TRUE = False 1560 1561 # Whether INTERVAL spans with literal format '\d+ hh:[mm:[ss[.ff]]]' 1562 # can omit the span unit `DAY TO MINUTE` or `DAY TO SECOND` 1563 SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT = False 1564 1565 __slots__ = ( 1566 "error_level", 1567 "error_message_context", 1568 "max_errors", 1569 "dialect", 1570 "sql", 1571 "errors", 1572 "_tokens", 1573 "_index", 1574 "_curr", 1575 "_next", 1576 "_prev", 1577 "_prev_comments", 1578 "_pipe_cte_counter", 1579 ) 1580 1581 # Autofilled 1582 SHOW_TRIE: t.Dict = {} 1583 SET_TRIE: t.Dict = {} 1584 1585 def __init__( 1586 self, 1587 error_level: t.Optional[ErrorLevel] = None, 1588 error_message_context: int = 100, 1589 max_errors: int = 3, 1590 dialect: DialectType = None, 1591 ): 1592 from sqlglot.dialects import Dialect 1593 1594 self.error_level = error_level or ErrorLevel.IMMEDIATE 1595 self.error_message_context = error_message_context 1596 self.max_errors = max_errors 1597 self.dialect = Dialect.get_or_raise(dialect) 1598 self.reset() 1599 1600 def reset(self): 1601 self.sql = "" 1602 self.errors = [] 1603 self._tokens = [] 1604 self._index = 0 1605 self._curr = None 1606 self._next = None 1607 self._prev = None 1608 self._prev_comments = None 1609 self._pipe_cte_counter = 0 1610 1611 def parse( 1612 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1613 ) -> t.List[t.Optional[exp.Expression]]: 1614 """ 1615 Parses a list of tokens and returns a list of syntax trees, one tree 1616 per parsed SQL statement. 1617 1618 Args: 1619 raw_tokens: The list of tokens. 1620 sql: The original SQL string, used to produce helpful debug messages. 1621 1622 Returns: 1623 The list of the produced syntax trees. 1624 """ 1625 return self._parse( 1626 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1627 ) 1628 1629 def parse_into( 1630 self, 1631 expression_types: exp.IntoType, 1632 raw_tokens: t.List[Token], 1633 sql: t.Optional[str] = None, 1634 ) -> t.List[t.Optional[exp.Expression]]: 1635 """ 1636 Parses a list of tokens into a given Expression type. If a collection of Expression 1637 types is given instead, this method will try to parse the token list into each one 1638 of them, stopping at the first for which the parsing succeeds. 1639 1640 Args: 1641 expression_types: The expression type(s) to try and parse the token list into. 1642 raw_tokens: The list of tokens. 1643 sql: The original SQL string, used to produce helpful debug messages. 1644 1645 Returns: 1646 The target Expression. 1647 """ 1648 errors = [] 1649 for expression_type in ensure_list(expression_types): 1650 parser = self.EXPRESSION_PARSERS.get(expression_type) 1651 if not parser: 1652 raise TypeError(f"No parser registered for {expression_type}") 1653 1654 try: 1655 return self._parse(parser, raw_tokens, sql) 1656 except ParseError as e: 1657 e.errors[0]["into_expression"] = expression_type 1658 errors.append(e) 1659 1660 raise ParseError( 1661 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1662 errors=merge_errors(errors), 1663 ) from errors[-1] 1664 1665 def _parse( 1666 self, 1667 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1668 raw_tokens: t.List[Token], 1669 sql: t.Optional[str] = None, 1670 ) -> t.List[t.Optional[exp.Expression]]: 1671 self.reset() 1672 self.sql = sql or "" 1673 1674 total = len(raw_tokens) 1675 chunks: t.List[t.List[Token]] = [[]] 1676 1677 for i, token in enumerate(raw_tokens): 1678 if token.token_type == TokenType.SEMICOLON: 1679 if token.comments: 1680 chunks.append([token]) 1681 1682 if i < total - 1: 1683 chunks.append([]) 1684 else: 1685 chunks[-1].append(token) 1686 1687 expressions = [] 1688 1689 for tokens in chunks: 1690 self._index = -1 1691 self._tokens = tokens 1692 self._advance() 1693 1694 expressions.append(parse_method(self)) 1695 1696 if self._index < len(self._tokens): 1697 self.raise_error("Invalid expression / Unexpected token") 1698 1699 self.check_errors() 1700 1701 return expressions 1702 1703 def check_errors(self) -> None: 1704 """Logs or raises any found errors, depending on the chosen error level setting.""" 1705 if self.error_level == ErrorLevel.WARN: 1706 for error in self.errors: 1707 logger.error(str(error)) 1708 elif self.error_level == ErrorLevel.RAISE and self.errors: 1709 raise ParseError( 1710 concat_messages(self.errors, self.max_errors), 1711 errors=merge_errors(self.errors), 1712 ) 1713 1714 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1715 """ 1716 Appends an error in the list of recorded errors or raises it, depending on the chosen 1717 error level setting. 1718 """ 1719 token = token or self._curr or self._prev or Token.string("") 1720 start = token.start 1721 end = token.end + 1 1722 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1723 highlight = self.sql[start:end] 1724 end_context = self.sql[end : end + self.error_message_context] 1725 1726 error = ParseError.new( 1727 f"{message}. Line {token.line}, Col: {token.col}.\n" 1728 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1729 description=message, 1730 line=token.line, 1731 col=token.col, 1732 start_context=start_context, 1733 highlight=highlight, 1734 end_context=end_context, 1735 ) 1736 1737 if self.error_level == ErrorLevel.IMMEDIATE: 1738 raise error 1739 1740 self.errors.append(error) 1741 1742 def expression( 1743 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1744 ) -> E: 1745 """ 1746 Creates a new, validated Expression. 1747 1748 Args: 1749 exp_class: The expression class to instantiate. 1750 comments: An optional list of comments to attach to the expression. 1751 kwargs: The arguments to set for the expression along with their respective values. 1752 1753 Returns: 1754 The target expression. 1755 """ 1756 instance = exp_class(**kwargs) 1757 instance.add_comments(comments) if comments else self._add_comments(instance) 1758 return self.validate_expression(instance) 1759 1760 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1761 if expression and self._prev_comments: 1762 expression.add_comments(self._prev_comments) 1763 self._prev_comments = None 1764 1765 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1766 """ 1767 Validates an Expression, making sure that all its mandatory arguments are set. 1768 1769 Args: 1770 expression: The expression to validate. 1771 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1772 1773 Returns: 1774 The validated expression. 1775 """ 1776 if self.error_level != ErrorLevel.IGNORE: 1777 for error_message in expression.error_messages(args): 1778 self.raise_error(error_message) 1779 1780 return expression 1781 1782 def _find_sql(self, start: Token, end: Token) -> str: 1783 return self.sql[start.start : end.end + 1] 1784 1785 def _is_connected(self) -> bool: 1786 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1787 1788 def _advance(self, times: int = 1) -> None: 1789 self._index += times 1790 self._curr = seq_get(self._tokens, self._index) 1791 self._next = seq_get(self._tokens, self._index + 1) 1792 1793 if self._index > 0: 1794 self._prev = self._tokens[self._index - 1] 1795 self._prev_comments = self._prev.comments 1796 else: 1797 self._prev = None 1798 self._prev_comments = None 1799 1800 def _retreat(self, index: int) -> None: 1801 if index != self._index: 1802 self._advance(index - self._index) 1803 1804 def _warn_unsupported(self) -> None: 1805 if len(self._tokens) <= 1: 1806 return 1807 1808 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1809 # interested in emitting a warning for the one being currently processed. 1810 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1811 1812 logger.warning( 1813 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1814 ) 1815 1816 def _parse_command(self) -> exp.Command: 1817 self._warn_unsupported() 1818 return self.expression( 1819 exp.Command, 1820 comments=self._prev_comments, 1821 this=self._prev.text.upper(), 1822 expression=self._parse_string(), 1823 ) 1824 1825 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1826 """ 1827 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1828 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1829 solve this by setting & resetting the parser state accordingly 1830 """ 1831 index = self._index 1832 error_level = self.error_level 1833 1834 self.error_level = ErrorLevel.IMMEDIATE 1835 try: 1836 this = parse_method() 1837 except ParseError: 1838 this = None 1839 finally: 1840 if not this or retreat: 1841 self._retreat(index) 1842 self.error_level = error_level 1843 1844 return this 1845 1846 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1847 start = self._prev 1848 exists = self._parse_exists() if allow_exists else None 1849 1850 self._match(TokenType.ON) 1851 1852 materialized = self._match_text_seq("MATERIALIZED") 1853 kind = self._match_set(self.CREATABLES) and self._prev 1854 if not kind: 1855 return self._parse_as_command(start) 1856 1857 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1858 this = self._parse_user_defined_function(kind=kind.token_type) 1859 elif kind.token_type == TokenType.TABLE: 1860 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1861 elif kind.token_type == TokenType.COLUMN: 1862 this = self._parse_column() 1863 else: 1864 this = self._parse_id_var() 1865 1866 self._match(TokenType.IS) 1867 1868 return self.expression( 1869 exp.Comment, 1870 this=this, 1871 kind=kind.text, 1872 expression=self._parse_string(), 1873 exists=exists, 1874 materialized=materialized, 1875 ) 1876 1877 def _parse_to_table( 1878 self, 1879 ) -> exp.ToTableProperty: 1880 table = self._parse_table_parts(schema=True) 1881 return self.expression(exp.ToTableProperty, this=table) 1882 1883 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1884 def _parse_ttl(self) -> exp.Expression: 1885 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1886 this = self._parse_bitwise() 1887 1888 if self._match_text_seq("DELETE"): 1889 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1890 if self._match_text_seq("RECOMPRESS"): 1891 return self.expression( 1892 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1893 ) 1894 if self._match_text_seq("TO", "DISK"): 1895 return self.expression( 1896 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1897 ) 1898 if self._match_text_seq("TO", "VOLUME"): 1899 return self.expression( 1900 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1901 ) 1902 1903 return this 1904 1905 expressions = self._parse_csv(_parse_ttl_action) 1906 where = self._parse_where() 1907 group = self._parse_group() 1908 1909 aggregates = None 1910 if group and self._match(TokenType.SET): 1911 aggregates = self._parse_csv(self._parse_set_item) 1912 1913 return self.expression( 1914 exp.MergeTreeTTL, 1915 expressions=expressions, 1916 where=where, 1917 group=group, 1918 aggregates=aggregates, 1919 ) 1920 1921 def _parse_statement(self) -> t.Optional[exp.Expression]: 1922 if self._curr is None: 1923 return None 1924 1925 if self._match_set(self.STATEMENT_PARSERS): 1926 comments = self._prev_comments 1927 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1928 stmt.add_comments(comments, prepend=True) 1929 return stmt 1930 1931 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1932 return self._parse_command() 1933 1934 expression = self._parse_expression() 1935 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1936 return self._parse_query_modifiers(expression) 1937 1938 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1939 start = self._prev 1940 temporary = self._match(TokenType.TEMPORARY) 1941 materialized = self._match_text_seq("MATERIALIZED") 1942 1943 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1944 if not kind: 1945 return self._parse_as_command(start) 1946 1947 concurrently = self._match_text_seq("CONCURRENTLY") 1948 if_exists = exists or self._parse_exists() 1949 1950 if kind == "COLUMN": 1951 this = self._parse_column() 1952 else: 1953 this = self._parse_table_parts( 1954 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1955 ) 1956 1957 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1958 1959 if self._match(TokenType.L_PAREN, advance=False): 1960 expressions = self._parse_wrapped_csv(self._parse_types) 1961 else: 1962 expressions = None 1963 1964 return self.expression( 1965 exp.Drop, 1966 exists=if_exists, 1967 this=this, 1968 expressions=expressions, 1969 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1970 temporary=temporary, 1971 materialized=materialized, 1972 cascade=self._match_text_seq("CASCADE"), 1973 constraints=self._match_text_seq("CONSTRAINTS"), 1974 purge=self._match_text_seq("PURGE"), 1975 cluster=cluster, 1976 concurrently=concurrently, 1977 ) 1978 1979 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1980 return ( 1981 self._match_text_seq("IF") 1982 and (not not_ or self._match(TokenType.NOT)) 1983 and self._match(TokenType.EXISTS) 1984 ) 1985 1986 def _parse_create(self) -> exp.Create | exp.Command: 1987 # Note: this can't be None because we've matched a statement parser 1988 start = self._prev 1989 1990 replace = ( 1991 start.token_type == TokenType.REPLACE 1992 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1993 or self._match_pair(TokenType.OR, TokenType.ALTER) 1994 ) 1995 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1996 1997 unique = self._match(TokenType.UNIQUE) 1998 1999 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 2000 clustered = True 2001 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2002 "COLUMNSTORE" 2003 ): 2004 clustered = False 2005 else: 2006 clustered = None 2007 2008 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2009 self._advance() 2010 2011 properties = None 2012 create_token = self._match_set(self.CREATABLES) and self._prev 2013 2014 if not create_token: 2015 # exp.Properties.Location.POST_CREATE 2016 properties = self._parse_properties() 2017 create_token = self._match_set(self.CREATABLES) and self._prev 2018 2019 if not properties or not create_token: 2020 return self._parse_as_command(start) 2021 2022 concurrently = self._match_text_seq("CONCURRENTLY") 2023 exists = self._parse_exists(not_=True) 2024 this = None 2025 expression: t.Optional[exp.Expression] = None 2026 indexes = None 2027 no_schema_binding = None 2028 begin = None 2029 end = None 2030 clone = None 2031 2032 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2033 nonlocal properties 2034 if properties and temp_props: 2035 properties.expressions.extend(temp_props.expressions) 2036 elif temp_props: 2037 properties = temp_props 2038 2039 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2040 this = self._parse_user_defined_function(kind=create_token.token_type) 2041 2042 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2043 extend_props(self._parse_properties()) 2044 2045 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2046 extend_props(self._parse_properties()) 2047 2048 if not expression: 2049 if self._match(TokenType.COMMAND): 2050 expression = self._parse_as_command(self._prev) 2051 else: 2052 begin = self._match(TokenType.BEGIN) 2053 return_ = self._match_text_seq("RETURN") 2054 2055 if self._match(TokenType.STRING, advance=False): 2056 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2057 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2058 expression = self._parse_string() 2059 extend_props(self._parse_properties()) 2060 else: 2061 expression = self._parse_user_defined_function_expression() 2062 2063 end = self._match_text_seq("END") 2064 2065 if return_: 2066 expression = self.expression(exp.Return, this=expression) 2067 elif create_token.token_type == TokenType.INDEX: 2068 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2069 if not self._match(TokenType.ON): 2070 index = self._parse_id_var() 2071 anonymous = False 2072 else: 2073 index = None 2074 anonymous = True 2075 2076 this = self._parse_index(index=index, anonymous=anonymous) 2077 elif create_token.token_type in self.DB_CREATABLES: 2078 table_parts = self._parse_table_parts( 2079 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2080 ) 2081 2082 # exp.Properties.Location.POST_NAME 2083 self._match(TokenType.COMMA) 2084 extend_props(self._parse_properties(before=True)) 2085 2086 this = self._parse_schema(this=table_parts) 2087 2088 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2089 extend_props(self._parse_properties()) 2090 2091 has_alias = self._match(TokenType.ALIAS) 2092 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2093 # exp.Properties.Location.POST_ALIAS 2094 extend_props(self._parse_properties()) 2095 2096 if create_token.token_type == TokenType.SEQUENCE: 2097 expression = self._parse_types() 2098 props = self._parse_properties() 2099 if props: 2100 sequence_props = exp.SequenceProperties() 2101 options = [] 2102 for prop in props: 2103 if isinstance(prop, exp.SequenceProperties): 2104 for arg, value in prop.args.items(): 2105 if arg == "options": 2106 options.extend(value) 2107 else: 2108 sequence_props.set(arg, value) 2109 prop.pop() 2110 2111 if options: 2112 sequence_props.set("options", options) 2113 2114 props.append("expressions", sequence_props) 2115 extend_props(props) 2116 else: 2117 expression = self._parse_ddl_select() 2118 2119 # Some dialects also support using a table as an alias instead of a SELECT. 2120 # Here we fallback to this as an alternative. 2121 if not expression and has_alias: 2122 expression = self._try_parse(self._parse_table_parts) 2123 2124 if create_token.token_type == TokenType.TABLE: 2125 # exp.Properties.Location.POST_EXPRESSION 2126 extend_props(self._parse_properties()) 2127 2128 indexes = [] 2129 while True: 2130 index = self._parse_index() 2131 2132 # exp.Properties.Location.POST_INDEX 2133 extend_props(self._parse_properties()) 2134 if not index: 2135 break 2136 else: 2137 self._match(TokenType.COMMA) 2138 indexes.append(index) 2139 elif create_token.token_type == TokenType.VIEW: 2140 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2141 no_schema_binding = True 2142 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2143 extend_props(self._parse_properties()) 2144 2145 shallow = self._match_text_seq("SHALLOW") 2146 2147 if self._match_texts(self.CLONE_KEYWORDS): 2148 copy = self._prev.text.lower() == "copy" 2149 clone = self.expression( 2150 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2151 ) 2152 2153 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2154 return self._parse_as_command(start) 2155 2156 create_kind_text = create_token.text.upper() 2157 return self.expression( 2158 exp.Create, 2159 this=this, 2160 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2161 replace=replace, 2162 refresh=refresh, 2163 unique=unique, 2164 expression=expression, 2165 exists=exists, 2166 properties=properties, 2167 indexes=indexes, 2168 no_schema_binding=no_schema_binding, 2169 begin=begin, 2170 end=end, 2171 clone=clone, 2172 concurrently=concurrently, 2173 clustered=clustered, 2174 ) 2175 2176 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2177 seq = exp.SequenceProperties() 2178 2179 options = [] 2180 index = self._index 2181 2182 while self._curr: 2183 self._match(TokenType.COMMA) 2184 if self._match_text_seq("INCREMENT"): 2185 self._match_text_seq("BY") 2186 self._match_text_seq("=") 2187 seq.set("increment", self._parse_term()) 2188 elif self._match_text_seq("MINVALUE"): 2189 seq.set("minvalue", self._parse_term()) 2190 elif self._match_text_seq("MAXVALUE"): 2191 seq.set("maxvalue", self._parse_term()) 2192 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2193 self._match_text_seq("=") 2194 seq.set("start", self._parse_term()) 2195 elif self._match_text_seq("CACHE"): 2196 # T-SQL allows empty CACHE which is initialized dynamically 2197 seq.set("cache", self._parse_number() or True) 2198 elif self._match_text_seq("OWNED", "BY"): 2199 # "OWNED BY NONE" is the default 2200 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2201 else: 2202 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2203 if opt: 2204 options.append(opt) 2205 else: 2206 break 2207 2208 seq.set("options", options if options else None) 2209 return None if self._index == index else seq 2210 2211 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2212 # only used for teradata currently 2213 self._match(TokenType.COMMA) 2214 2215 kwargs = { 2216 "no": self._match_text_seq("NO"), 2217 "dual": self._match_text_seq("DUAL"), 2218 "before": self._match_text_seq("BEFORE"), 2219 "default": self._match_text_seq("DEFAULT"), 2220 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2221 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2222 "after": self._match_text_seq("AFTER"), 2223 "minimum": self._match_texts(("MIN", "MINIMUM")), 2224 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2225 } 2226 2227 if self._match_texts(self.PROPERTY_PARSERS): 2228 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2229 try: 2230 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2231 except TypeError: 2232 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2233 2234 return None 2235 2236 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2237 return self._parse_wrapped_csv(self._parse_property) 2238 2239 def _parse_property(self) -> t.Optional[exp.Expression]: 2240 if self._match_texts(self.PROPERTY_PARSERS): 2241 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2242 2243 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2244 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2245 2246 if self._match_text_seq("COMPOUND", "SORTKEY"): 2247 return self._parse_sortkey(compound=True) 2248 2249 if self._match_text_seq("SQL", "SECURITY"): 2250 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2251 2252 index = self._index 2253 2254 seq_props = self._parse_sequence_properties() 2255 if seq_props: 2256 return seq_props 2257 2258 self._retreat(index) 2259 key = self._parse_column() 2260 2261 if not self._match(TokenType.EQ): 2262 self._retreat(index) 2263 return None 2264 2265 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2266 if isinstance(key, exp.Column): 2267 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2268 2269 value = self._parse_bitwise() or self._parse_var(any_token=True) 2270 2271 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2272 if isinstance(value, exp.Column): 2273 value = exp.var(value.name) 2274 2275 return self.expression(exp.Property, this=key, value=value) 2276 2277 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2278 if self._match_text_seq("BY"): 2279 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2280 2281 self._match(TokenType.ALIAS) 2282 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2283 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2284 2285 return self.expression( 2286 exp.FileFormatProperty, 2287 this=( 2288 self.expression( 2289 exp.InputOutputFormat, 2290 input_format=input_format, 2291 output_format=output_format, 2292 ) 2293 if input_format or output_format 2294 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2295 ), 2296 hive_format=True, 2297 ) 2298 2299 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2300 field = self._parse_field() 2301 if isinstance(field, exp.Identifier) and not field.quoted: 2302 field = exp.var(field) 2303 2304 return field 2305 2306 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2307 self._match(TokenType.EQ) 2308 self._match(TokenType.ALIAS) 2309 2310 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2311 2312 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2313 properties = [] 2314 while True: 2315 if before: 2316 prop = self._parse_property_before() 2317 else: 2318 prop = self._parse_property() 2319 if not prop: 2320 break 2321 for p in ensure_list(prop): 2322 properties.append(p) 2323 2324 if properties: 2325 return self.expression(exp.Properties, expressions=properties) 2326 2327 return None 2328 2329 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2330 return self.expression( 2331 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2332 ) 2333 2334 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2335 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2336 security_specifier = self._prev.text.upper() 2337 return self.expression(exp.SecurityProperty, this=security_specifier) 2338 return None 2339 2340 def _parse_settings_property(self) -> exp.SettingsProperty: 2341 return self.expression( 2342 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2343 ) 2344 2345 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2346 if self._index >= 2: 2347 pre_volatile_token = self._tokens[self._index - 2] 2348 else: 2349 pre_volatile_token = None 2350 2351 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2352 return exp.VolatileProperty() 2353 2354 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2355 2356 def _parse_retention_period(self) -> exp.Var: 2357 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2358 number = self._parse_number() 2359 number_str = f"{number} " if number else "" 2360 unit = self._parse_var(any_token=True) 2361 return exp.var(f"{number_str}{unit}") 2362 2363 def _parse_system_versioning_property( 2364 self, with_: bool = False 2365 ) -> exp.WithSystemVersioningProperty: 2366 self._match(TokenType.EQ) 2367 prop = self.expression( 2368 exp.WithSystemVersioningProperty, 2369 **{ # type: ignore 2370 "on": True, 2371 "with": with_, 2372 }, 2373 ) 2374 2375 if self._match_text_seq("OFF"): 2376 prop.set("on", False) 2377 return prop 2378 2379 self._match(TokenType.ON) 2380 if self._match(TokenType.L_PAREN): 2381 while self._curr and not self._match(TokenType.R_PAREN): 2382 if self._match_text_seq("HISTORY_TABLE", "="): 2383 prop.set("this", self._parse_table_parts()) 2384 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2385 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2386 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2387 prop.set("retention_period", self._parse_retention_period()) 2388 2389 self._match(TokenType.COMMA) 2390 2391 return prop 2392 2393 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2394 self._match(TokenType.EQ) 2395 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2396 prop = self.expression(exp.DataDeletionProperty, on=on) 2397 2398 if self._match(TokenType.L_PAREN): 2399 while self._curr and not self._match(TokenType.R_PAREN): 2400 if self._match_text_seq("FILTER_COLUMN", "="): 2401 prop.set("filter_column", self._parse_column()) 2402 elif self._match_text_seq("RETENTION_PERIOD", "="): 2403 prop.set("retention_period", self._parse_retention_period()) 2404 2405 self._match(TokenType.COMMA) 2406 2407 return prop 2408 2409 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2410 kind = "HASH" 2411 expressions: t.Optional[t.List[exp.Expression]] = None 2412 if self._match_text_seq("BY", "HASH"): 2413 expressions = self._parse_wrapped_csv(self._parse_id_var) 2414 elif self._match_text_seq("BY", "RANDOM"): 2415 kind = "RANDOM" 2416 2417 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2418 buckets: t.Optional[exp.Expression] = None 2419 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2420 buckets = self._parse_number() 2421 2422 return self.expression( 2423 exp.DistributedByProperty, 2424 expressions=expressions, 2425 kind=kind, 2426 buckets=buckets, 2427 order=self._parse_order(), 2428 ) 2429 2430 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2431 self._match_text_seq("KEY") 2432 expressions = self._parse_wrapped_id_vars() 2433 return self.expression(expr_type, expressions=expressions) 2434 2435 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2436 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2437 prop = self._parse_system_versioning_property(with_=True) 2438 self._match_r_paren() 2439 return prop 2440 2441 if self._match(TokenType.L_PAREN, advance=False): 2442 return self._parse_wrapped_properties() 2443 2444 if self._match_text_seq("JOURNAL"): 2445 return self._parse_withjournaltable() 2446 2447 if self._match_texts(self.VIEW_ATTRIBUTES): 2448 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2449 2450 if self._match_text_seq("DATA"): 2451 return self._parse_withdata(no=False) 2452 elif self._match_text_seq("NO", "DATA"): 2453 return self._parse_withdata(no=True) 2454 2455 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2456 return self._parse_serde_properties(with_=True) 2457 2458 if self._match(TokenType.SCHEMA): 2459 return self.expression( 2460 exp.WithSchemaBindingProperty, 2461 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2462 ) 2463 2464 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2465 return self.expression( 2466 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2467 ) 2468 2469 if not self._next: 2470 return None 2471 2472 return self._parse_withisolatedloading() 2473 2474 def _parse_procedure_option(self) -> exp.Expression | None: 2475 if self._match_text_seq("EXECUTE", "AS"): 2476 return self.expression( 2477 exp.ExecuteAsProperty, 2478 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2479 or self._parse_string(), 2480 ) 2481 2482 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2483 2484 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2485 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2486 self._match(TokenType.EQ) 2487 2488 user = self._parse_id_var() 2489 self._match(TokenType.PARAMETER) 2490 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2491 2492 if not user or not host: 2493 return None 2494 2495 return exp.DefinerProperty(this=f"{user}@{host}") 2496 2497 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2498 self._match(TokenType.TABLE) 2499 self._match(TokenType.EQ) 2500 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2501 2502 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2503 return self.expression(exp.LogProperty, no=no) 2504 2505 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2506 return self.expression(exp.JournalProperty, **kwargs) 2507 2508 def _parse_checksum(self) -> exp.ChecksumProperty: 2509 self._match(TokenType.EQ) 2510 2511 on = None 2512 if self._match(TokenType.ON): 2513 on = True 2514 elif self._match_text_seq("OFF"): 2515 on = False 2516 2517 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2518 2519 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2520 return self.expression( 2521 exp.Cluster, 2522 expressions=( 2523 self._parse_wrapped_csv(self._parse_ordered) 2524 if wrapped 2525 else self._parse_csv(self._parse_ordered) 2526 ), 2527 ) 2528 2529 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2530 self._match_text_seq("BY") 2531 2532 self._match_l_paren() 2533 expressions = self._parse_csv(self._parse_column) 2534 self._match_r_paren() 2535 2536 if self._match_text_seq("SORTED", "BY"): 2537 self._match_l_paren() 2538 sorted_by = self._parse_csv(self._parse_ordered) 2539 self._match_r_paren() 2540 else: 2541 sorted_by = None 2542 2543 self._match(TokenType.INTO) 2544 buckets = self._parse_number() 2545 self._match_text_seq("BUCKETS") 2546 2547 return self.expression( 2548 exp.ClusteredByProperty, 2549 expressions=expressions, 2550 sorted_by=sorted_by, 2551 buckets=buckets, 2552 ) 2553 2554 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2555 if not self._match_text_seq("GRANTS"): 2556 self._retreat(self._index - 1) 2557 return None 2558 2559 return self.expression(exp.CopyGrantsProperty) 2560 2561 def _parse_freespace(self) -> exp.FreespaceProperty: 2562 self._match(TokenType.EQ) 2563 return self.expression( 2564 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2565 ) 2566 2567 def _parse_mergeblockratio( 2568 self, no: bool = False, default: bool = False 2569 ) -> exp.MergeBlockRatioProperty: 2570 if self._match(TokenType.EQ): 2571 return self.expression( 2572 exp.MergeBlockRatioProperty, 2573 this=self._parse_number(), 2574 percent=self._match(TokenType.PERCENT), 2575 ) 2576 2577 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2578 2579 def _parse_datablocksize( 2580 self, 2581 default: t.Optional[bool] = None, 2582 minimum: t.Optional[bool] = None, 2583 maximum: t.Optional[bool] = None, 2584 ) -> exp.DataBlocksizeProperty: 2585 self._match(TokenType.EQ) 2586 size = self._parse_number() 2587 2588 units = None 2589 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2590 units = self._prev.text 2591 2592 return self.expression( 2593 exp.DataBlocksizeProperty, 2594 size=size, 2595 units=units, 2596 default=default, 2597 minimum=minimum, 2598 maximum=maximum, 2599 ) 2600 2601 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2602 self._match(TokenType.EQ) 2603 always = self._match_text_seq("ALWAYS") 2604 manual = self._match_text_seq("MANUAL") 2605 never = self._match_text_seq("NEVER") 2606 default = self._match_text_seq("DEFAULT") 2607 2608 autotemp = None 2609 if self._match_text_seq("AUTOTEMP"): 2610 autotemp = self._parse_schema() 2611 2612 return self.expression( 2613 exp.BlockCompressionProperty, 2614 always=always, 2615 manual=manual, 2616 never=never, 2617 default=default, 2618 autotemp=autotemp, 2619 ) 2620 2621 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2622 index = self._index 2623 no = self._match_text_seq("NO") 2624 concurrent = self._match_text_seq("CONCURRENT") 2625 2626 if not self._match_text_seq("ISOLATED", "LOADING"): 2627 self._retreat(index) 2628 return None 2629 2630 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2631 return self.expression( 2632 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2633 ) 2634 2635 def _parse_locking(self) -> exp.LockingProperty: 2636 if self._match(TokenType.TABLE): 2637 kind = "TABLE" 2638 elif self._match(TokenType.VIEW): 2639 kind = "VIEW" 2640 elif self._match(TokenType.ROW): 2641 kind = "ROW" 2642 elif self._match_text_seq("DATABASE"): 2643 kind = "DATABASE" 2644 else: 2645 kind = None 2646 2647 if kind in ("DATABASE", "TABLE", "VIEW"): 2648 this = self._parse_table_parts() 2649 else: 2650 this = None 2651 2652 if self._match(TokenType.FOR): 2653 for_or_in = "FOR" 2654 elif self._match(TokenType.IN): 2655 for_or_in = "IN" 2656 else: 2657 for_or_in = None 2658 2659 if self._match_text_seq("ACCESS"): 2660 lock_type = "ACCESS" 2661 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2662 lock_type = "EXCLUSIVE" 2663 elif self._match_text_seq("SHARE"): 2664 lock_type = "SHARE" 2665 elif self._match_text_seq("READ"): 2666 lock_type = "READ" 2667 elif self._match_text_seq("WRITE"): 2668 lock_type = "WRITE" 2669 elif self._match_text_seq("CHECKSUM"): 2670 lock_type = "CHECKSUM" 2671 else: 2672 lock_type = None 2673 2674 override = self._match_text_seq("OVERRIDE") 2675 2676 return self.expression( 2677 exp.LockingProperty, 2678 this=this, 2679 kind=kind, 2680 for_or_in=for_or_in, 2681 lock_type=lock_type, 2682 override=override, 2683 ) 2684 2685 def _parse_partition_by(self) -> t.List[exp.Expression]: 2686 if self._match(TokenType.PARTITION_BY): 2687 return self._parse_csv(self._parse_assignment) 2688 return [] 2689 2690 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2691 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2692 if self._match_text_seq("MINVALUE"): 2693 return exp.var("MINVALUE") 2694 if self._match_text_seq("MAXVALUE"): 2695 return exp.var("MAXVALUE") 2696 return self._parse_bitwise() 2697 2698 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2699 expression = None 2700 from_expressions = None 2701 to_expressions = None 2702 2703 if self._match(TokenType.IN): 2704 this = self._parse_wrapped_csv(self._parse_bitwise) 2705 elif self._match(TokenType.FROM): 2706 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2707 self._match_text_seq("TO") 2708 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2709 elif self._match_text_seq("WITH", "(", "MODULUS"): 2710 this = self._parse_number() 2711 self._match_text_seq(",", "REMAINDER") 2712 expression = self._parse_number() 2713 self._match_r_paren() 2714 else: 2715 self.raise_error("Failed to parse partition bound spec.") 2716 2717 return self.expression( 2718 exp.PartitionBoundSpec, 2719 this=this, 2720 expression=expression, 2721 from_expressions=from_expressions, 2722 to_expressions=to_expressions, 2723 ) 2724 2725 # https://www.postgresql.org/docs/current/sql-createtable.html 2726 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2727 if not self._match_text_seq("OF"): 2728 self._retreat(self._index - 1) 2729 return None 2730 2731 this = self._parse_table(schema=True) 2732 2733 if self._match(TokenType.DEFAULT): 2734 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2735 elif self._match_text_seq("FOR", "VALUES"): 2736 expression = self._parse_partition_bound_spec() 2737 else: 2738 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2739 2740 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2741 2742 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2743 self._match(TokenType.EQ) 2744 return self.expression( 2745 exp.PartitionedByProperty, 2746 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2747 ) 2748 2749 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2750 if self._match_text_seq("AND", "STATISTICS"): 2751 statistics = True 2752 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2753 statistics = False 2754 else: 2755 statistics = None 2756 2757 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2758 2759 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2760 if self._match_text_seq("SQL"): 2761 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2762 return None 2763 2764 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2765 if self._match_text_seq("SQL", "DATA"): 2766 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2767 return None 2768 2769 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2770 if self._match_text_seq("PRIMARY", "INDEX"): 2771 return exp.NoPrimaryIndexProperty() 2772 if self._match_text_seq("SQL"): 2773 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2774 return None 2775 2776 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2777 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2778 return exp.OnCommitProperty() 2779 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2780 return exp.OnCommitProperty(delete=True) 2781 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2782 2783 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2784 if self._match_text_seq("SQL", "DATA"): 2785 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2786 return None 2787 2788 def _parse_distkey(self) -> exp.DistKeyProperty: 2789 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2790 2791 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2792 table = self._parse_table(schema=True) 2793 2794 options = [] 2795 while self._match_texts(("INCLUDING", "EXCLUDING")): 2796 this = self._prev.text.upper() 2797 2798 id_var = self._parse_id_var() 2799 if not id_var: 2800 return None 2801 2802 options.append( 2803 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2804 ) 2805 2806 return self.expression(exp.LikeProperty, this=table, expressions=options) 2807 2808 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2809 return self.expression( 2810 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2811 ) 2812 2813 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2814 self._match(TokenType.EQ) 2815 return self.expression( 2816 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2817 ) 2818 2819 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2820 self._match_text_seq("WITH", "CONNECTION") 2821 return self.expression( 2822 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2823 ) 2824 2825 def _parse_returns(self) -> exp.ReturnsProperty: 2826 value: t.Optional[exp.Expression] 2827 null = None 2828 is_table = self._match(TokenType.TABLE) 2829 2830 if is_table: 2831 if self._match(TokenType.LT): 2832 value = self.expression( 2833 exp.Schema, 2834 this="TABLE", 2835 expressions=self._parse_csv(self._parse_struct_types), 2836 ) 2837 if not self._match(TokenType.GT): 2838 self.raise_error("Expecting >") 2839 else: 2840 value = self._parse_schema(exp.var("TABLE")) 2841 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2842 null = True 2843 value = None 2844 else: 2845 value = self._parse_types() 2846 2847 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2848 2849 def _parse_describe(self) -> exp.Describe: 2850 kind = self._match_set(self.CREATABLES) and self._prev.text 2851 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2852 if self._match(TokenType.DOT): 2853 style = None 2854 self._retreat(self._index - 2) 2855 2856 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2857 2858 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2859 this = self._parse_statement() 2860 else: 2861 this = self._parse_table(schema=True) 2862 2863 properties = self._parse_properties() 2864 expressions = properties.expressions if properties else None 2865 partition = self._parse_partition() 2866 return self.expression( 2867 exp.Describe, 2868 this=this, 2869 style=style, 2870 kind=kind, 2871 expressions=expressions, 2872 partition=partition, 2873 format=format, 2874 ) 2875 2876 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2877 kind = self._prev.text.upper() 2878 expressions = [] 2879 2880 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2881 if self._match(TokenType.WHEN): 2882 expression = self._parse_disjunction() 2883 self._match(TokenType.THEN) 2884 else: 2885 expression = None 2886 2887 else_ = self._match(TokenType.ELSE) 2888 2889 if not self._match(TokenType.INTO): 2890 return None 2891 2892 return self.expression( 2893 exp.ConditionalInsert, 2894 this=self.expression( 2895 exp.Insert, 2896 this=self._parse_table(schema=True), 2897 expression=self._parse_derived_table_values(), 2898 ), 2899 expression=expression, 2900 else_=else_, 2901 ) 2902 2903 expression = parse_conditional_insert() 2904 while expression is not None: 2905 expressions.append(expression) 2906 expression = parse_conditional_insert() 2907 2908 return self.expression( 2909 exp.MultitableInserts, 2910 kind=kind, 2911 comments=comments, 2912 expressions=expressions, 2913 source=self._parse_table(), 2914 ) 2915 2916 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2917 comments = [] 2918 hint = self._parse_hint() 2919 overwrite = self._match(TokenType.OVERWRITE) 2920 ignore = self._match(TokenType.IGNORE) 2921 local = self._match_text_seq("LOCAL") 2922 alternative = None 2923 is_function = None 2924 2925 if self._match_text_seq("DIRECTORY"): 2926 this: t.Optional[exp.Expression] = self.expression( 2927 exp.Directory, 2928 this=self._parse_var_or_string(), 2929 local=local, 2930 row_format=self._parse_row_format(match_row=True), 2931 ) 2932 else: 2933 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2934 comments += ensure_list(self._prev_comments) 2935 return self._parse_multitable_inserts(comments) 2936 2937 if self._match(TokenType.OR): 2938 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2939 2940 self._match(TokenType.INTO) 2941 comments += ensure_list(self._prev_comments) 2942 self._match(TokenType.TABLE) 2943 is_function = self._match(TokenType.FUNCTION) 2944 2945 this = ( 2946 self._parse_table(schema=True, parse_partition=True) 2947 if not is_function 2948 else self._parse_function() 2949 ) 2950 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2951 this.set("alias", self._parse_table_alias()) 2952 2953 returning = self._parse_returning() 2954 2955 return self.expression( 2956 exp.Insert, 2957 comments=comments, 2958 hint=hint, 2959 is_function=is_function, 2960 this=this, 2961 stored=self._match_text_seq("STORED") and self._parse_stored(), 2962 by_name=self._match_text_seq("BY", "NAME"), 2963 exists=self._parse_exists(), 2964 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2965 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2966 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2967 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2968 conflict=self._parse_on_conflict(), 2969 returning=returning or self._parse_returning(), 2970 overwrite=overwrite, 2971 alternative=alternative, 2972 ignore=ignore, 2973 source=self._match(TokenType.TABLE) and self._parse_table(), 2974 ) 2975 2976 def _parse_kill(self) -> exp.Kill: 2977 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2978 2979 return self.expression( 2980 exp.Kill, 2981 this=self._parse_primary(), 2982 kind=kind, 2983 ) 2984 2985 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2986 conflict = self._match_text_seq("ON", "CONFLICT") 2987 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2988 2989 if not conflict and not duplicate: 2990 return None 2991 2992 conflict_keys = None 2993 constraint = None 2994 2995 if conflict: 2996 if self._match_text_seq("ON", "CONSTRAINT"): 2997 constraint = self._parse_id_var() 2998 elif self._match(TokenType.L_PAREN): 2999 conflict_keys = self._parse_csv(self._parse_id_var) 3000 self._match_r_paren() 3001 3002 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3003 if self._prev.token_type == TokenType.UPDATE: 3004 self._match(TokenType.SET) 3005 expressions = self._parse_csv(self._parse_equality) 3006 else: 3007 expressions = None 3008 3009 return self.expression( 3010 exp.OnConflict, 3011 duplicate=duplicate, 3012 expressions=expressions, 3013 action=action, 3014 conflict_keys=conflict_keys, 3015 constraint=constraint, 3016 where=self._parse_where(), 3017 ) 3018 3019 def _parse_returning(self) -> t.Optional[exp.Returning]: 3020 if not self._match(TokenType.RETURNING): 3021 return None 3022 return self.expression( 3023 exp.Returning, 3024 expressions=self._parse_csv(self._parse_expression), 3025 into=self._match(TokenType.INTO) and self._parse_table_part(), 3026 ) 3027 3028 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3029 if not self._match(TokenType.FORMAT): 3030 return None 3031 return self._parse_row_format() 3032 3033 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3034 index = self._index 3035 with_ = with_ or self._match_text_seq("WITH") 3036 3037 if not self._match(TokenType.SERDE_PROPERTIES): 3038 self._retreat(index) 3039 return None 3040 return self.expression( 3041 exp.SerdeProperties, 3042 **{ # type: ignore 3043 "expressions": self._parse_wrapped_properties(), 3044 "with": with_, 3045 }, 3046 ) 3047 3048 def _parse_row_format( 3049 self, match_row: bool = False 3050 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3051 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3052 return None 3053 3054 if self._match_text_seq("SERDE"): 3055 this = self._parse_string() 3056 3057 serde_properties = self._parse_serde_properties() 3058 3059 return self.expression( 3060 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3061 ) 3062 3063 self._match_text_seq("DELIMITED") 3064 3065 kwargs = {} 3066 3067 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3068 kwargs["fields"] = self._parse_string() 3069 if self._match_text_seq("ESCAPED", "BY"): 3070 kwargs["escaped"] = self._parse_string() 3071 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3072 kwargs["collection_items"] = self._parse_string() 3073 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3074 kwargs["map_keys"] = self._parse_string() 3075 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3076 kwargs["lines"] = self._parse_string() 3077 if self._match_text_seq("NULL", "DEFINED", "AS"): 3078 kwargs["null"] = self._parse_string() 3079 3080 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3081 3082 def _parse_load(self) -> exp.LoadData | exp.Command: 3083 if self._match_text_seq("DATA"): 3084 local = self._match_text_seq("LOCAL") 3085 self._match_text_seq("INPATH") 3086 inpath = self._parse_string() 3087 overwrite = self._match(TokenType.OVERWRITE) 3088 self._match_pair(TokenType.INTO, TokenType.TABLE) 3089 3090 return self.expression( 3091 exp.LoadData, 3092 this=self._parse_table(schema=True), 3093 local=local, 3094 overwrite=overwrite, 3095 inpath=inpath, 3096 partition=self._parse_partition(), 3097 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3098 serde=self._match_text_seq("SERDE") and self._parse_string(), 3099 ) 3100 return self._parse_as_command(self._prev) 3101 3102 def _parse_delete(self) -> exp.Delete: 3103 # This handles MySQL's "Multiple-Table Syntax" 3104 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3105 tables = None 3106 if not self._match(TokenType.FROM, advance=False): 3107 tables = self._parse_csv(self._parse_table) or None 3108 3109 returning = self._parse_returning() 3110 3111 return self.expression( 3112 exp.Delete, 3113 tables=tables, 3114 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3115 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3116 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3117 where=self._parse_where(), 3118 returning=returning or self._parse_returning(), 3119 limit=self._parse_limit(), 3120 ) 3121 3122 def _parse_update(self) -> exp.Update: 3123 kwargs: t.Dict[str, t.Any] = { 3124 "this": self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS), 3125 } 3126 while self._curr: 3127 if self._match(TokenType.SET): 3128 kwargs["expressions"] = self._parse_csv(self._parse_equality) 3129 elif self._match(TokenType.RETURNING, advance=False): 3130 kwargs["returning"] = self._parse_returning() 3131 elif self._match(TokenType.FROM, advance=False): 3132 kwargs["from"] = self._parse_from(joins=True) 3133 elif self._match(TokenType.WHERE, advance=False): 3134 kwargs["where"] = self._parse_where() 3135 elif self._match(TokenType.ORDER_BY, advance=False): 3136 kwargs["order"] = self._parse_order() 3137 elif self._match(TokenType.LIMIT, advance=False): 3138 kwargs["limit"] = self._parse_limit() 3139 else: 3140 break 3141 3142 return self.expression(exp.Update, **kwargs) 3143 3144 def _parse_use(self) -> exp.Use: 3145 return self.expression( 3146 exp.Use, 3147 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3148 this=self._parse_table(schema=False), 3149 ) 3150 3151 def _parse_uncache(self) -> exp.Uncache: 3152 if not self._match(TokenType.TABLE): 3153 self.raise_error("Expecting TABLE after UNCACHE") 3154 3155 return self.expression( 3156 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3157 ) 3158 3159 def _parse_cache(self) -> exp.Cache: 3160 lazy = self._match_text_seq("LAZY") 3161 self._match(TokenType.TABLE) 3162 table = self._parse_table(schema=True) 3163 3164 options = [] 3165 if self._match_text_seq("OPTIONS"): 3166 self._match_l_paren() 3167 k = self._parse_string() 3168 self._match(TokenType.EQ) 3169 v = self._parse_string() 3170 options = [k, v] 3171 self._match_r_paren() 3172 3173 self._match(TokenType.ALIAS) 3174 return self.expression( 3175 exp.Cache, 3176 this=table, 3177 lazy=lazy, 3178 options=options, 3179 expression=self._parse_select(nested=True), 3180 ) 3181 3182 def _parse_partition(self) -> t.Optional[exp.Partition]: 3183 if not self._match_texts(self.PARTITION_KEYWORDS): 3184 return None 3185 3186 return self.expression( 3187 exp.Partition, 3188 subpartition=self._prev.text.upper() == "SUBPARTITION", 3189 expressions=self._parse_wrapped_csv(self._parse_assignment), 3190 ) 3191 3192 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3193 def _parse_value_expression() -> t.Optional[exp.Expression]: 3194 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3195 return exp.var(self._prev.text.upper()) 3196 return self._parse_expression() 3197 3198 if self._match(TokenType.L_PAREN): 3199 expressions = self._parse_csv(_parse_value_expression) 3200 self._match_r_paren() 3201 return self.expression(exp.Tuple, expressions=expressions) 3202 3203 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3204 expression = self._parse_expression() 3205 if expression: 3206 return self.expression(exp.Tuple, expressions=[expression]) 3207 return None 3208 3209 def _parse_projections(self) -> t.List[exp.Expression]: 3210 return self._parse_expressions() 3211 3212 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3213 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3214 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3215 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3216 ) 3217 elif self._match(TokenType.FROM): 3218 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3219 # Support parentheses for duckdb FROM-first syntax 3220 select = self._parse_select(from_=from_) 3221 if select: 3222 if not select.args.get("from"): 3223 select.set("from", from_) 3224 this = select 3225 else: 3226 this = exp.select("*").from_(t.cast(exp.From, from_)) 3227 else: 3228 this = ( 3229 self._parse_table(consume_pipe=True) 3230 if table 3231 else self._parse_select(nested=True, parse_set_operation=False) 3232 ) 3233 3234 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3235 # in case a modifier (e.g. join) is following 3236 if table and isinstance(this, exp.Values) and this.alias: 3237 alias = this.args["alias"].pop() 3238 this = exp.Table(this=this, alias=alias) 3239 3240 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3241 3242 return this 3243 3244 def _parse_select( 3245 self, 3246 nested: bool = False, 3247 table: bool = False, 3248 parse_subquery_alias: bool = True, 3249 parse_set_operation: bool = True, 3250 consume_pipe: bool = True, 3251 from_: t.Optional[exp.From] = None, 3252 ) -> t.Optional[exp.Expression]: 3253 query = self._parse_select_query( 3254 nested=nested, 3255 table=table, 3256 parse_subquery_alias=parse_subquery_alias, 3257 parse_set_operation=parse_set_operation, 3258 ) 3259 3260 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3261 if not query and from_: 3262 query = exp.select("*").from_(from_) 3263 if isinstance(query, exp.Query): 3264 query = self._parse_pipe_syntax_query(query) 3265 query = query.subquery(copy=False) if query and table else query 3266 3267 return query 3268 3269 def _parse_select_query( 3270 self, 3271 nested: bool = False, 3272 table: bool = False, 3273 parse_subquery_alias: bool = True, 3274 parse_set_operation: bool = True, 3275 ) -> t.Optional[exp.Expression]: 3276 cte = self._parse_with() 3277 3278 if cte: 3279 this = self._parse_statement() 3280 3281 if not this: 3282 self.raise_error("Failed to parse any statement following CTE") 3283 return cte 3284 3285 if "with" in this.arg_types: 3286 this.set("with", cte) 3287 else: 3288 self.raise_error(f"{this.key} does not support CTE") 3289 this = cte 3290 3291 return this 3292 3293 # duckdb supports leading with FROM x 3294 from_ = ( 3295 self._parse_from(consume_pipe=True) 3296 if self._match(TokenType.FROM, advance=False) 3297 else None 3298 ) 3299 3300 if self._match(TokenType.SELECT): 3301 comments = self._prev_comments 3302 3303 hint = self._parse_hint() 3304 3305 if self._next and not self._next.token_type == TokenType.DOT: 3306 all_ = self._match(TokenType.ALL) 3307 distinct = self._match_set(self.DISTINCT_TOKENS) 3308 else: 3309 all_, distinct = None, None 3310 3311 kind = ( 3312 self._match(TokenType.ALIAS) 3313 and self._match_texts(("STRUCT", "VALUE")) 3314 and self._prev.text.upper() 3315 ) 3316 3317 if distinct: 3318 distinct = self.expression( 3319 exp.Distinct, 3320 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3321 ) 3322 3323 if all_ and distinct: 3324 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3325 3326 operation_modifiers = [] 3327 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3328 operation_modifiers.append(exp.var(self._prev.text.upper())) 3329 3330 limit = self._parse_limit(top=True) 3331 projections = self._parse_projections() 3332 3333 this = self.expression( 3334 exp.Select, 3335 kind=kind, 3336 hint=hint, 3337 distinct=distinct, 3338 expressions=projections, 3339 limit=limit, 3340 operation_modifiers=operation_modifiers or None, 3341 ) 3342 this.comments = comments 3343 3344 into = self._parse_into() 3345 if into: 3346 this.set("into", into) 3347 3348 if not from_: 3349 from_ = self._parse_from() 3350 3351 if from_: 3352 this.set("from", from_) 3353 3354 this = self._parse_query_modifiers(this) 3355 elif (table or nested) and self._match(TokenType.L_PAREN): 3356 this = self._parse_wrapped_select(table=table) 3357 3358 # We return early here so that the UNION isn't attached to the subquery by the 3359 # following call to _parse_set_operations, but instead becomes the parent node 3360 self._match_r_paren() 3361 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3362 elif self._match(TokenType.VALUES, advance=False): 3363 this = self._parse_derived_table_values() 3364 elif from_: 3365 this = exp.select("*").from_(from_.this, copy=False) 3366 elif self._match(TokenType.SUMMARIZE): 3367 table = self._match(TokenType.TABLE) 3368 this = self._parse_select() or self._parse_string() or self._parse_table() 3369 return self.expression(exp.Summarize, this=this, table=table) 3370 elif self._match(TokenType.DESCRIBE): 3371 this = self._parse_describe() 3372 elif self._match_text_seq("STREAM"): 3373 this = self._parse_function() 3374 if this: 3375 this = self.expression(exp.Stream, this=this) 3376 else: 3377 self._retreat(self._index - 1) 3378 else: 3379 this = None 3380 3381 return self._parse_set_operations(this) if parse_set_operation else this 3382 3383 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3384 self._match_text_seq("SEARCH") 3385 3386 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3387 3388 if not kind: 3389 return None 3390 3391 self._match_text_seq("FIRST", "BY") 3392 3393 return self.expression( 3394 exp.RecursiveWithSearch, 3395 kind=kind, 3396 this=self._parse_id_var(), 3397 expression=self._match_text_seq("SET") and self._parse_id_var(), 3398 using=self._match_text_seq("USING") and self._parse_id_var(), 3399 ) 3400 3401 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3402 if not skip_with_token and not self._match(TokenType.WITH): 3403 return None 3404 3405 comments = self._prev_comments 3406 recursive = self._match(TokenType.RECURSIVE) 3407 3408 last_comments = None 3409 expressions = [] 3410 while True: 3411 cte = self._parse_cte() 3412 if isinstance(cte, exp.CTE): 3413 expressions.append(cte) 3414 if last_comments: 3415 cte.add_comments(last_comments) 3416 3417 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3418 break 3419 else: 3420 self._match(TokenType.WITH) 3421 3422 last_comments = self._prev_comments 3423 3424 return self.expression( 3425 exp.With, 3426 comments=comments, 3427 expressions=expressions, 3428 recursive=recursive, 3429 search=self._parse_recursive_with_search(), 3430 ) 3431 3432 def _parse_cte(self) -> t.Optional[exp.CTE]: 3433 index = self._index 3434 3435 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3436 if not alias or not alias.this: 3437 self.raise_error("Expected CTE to have alias") 3438 3439 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3440 self._retreat(index) 3441 return None 3442 3443 comments = self._prev_comments 3444 3445 if self._match_text_seq("NOT", "MATERIALIZED"): 3446 materialized = False 3447 elif self._match_text_seq("MATERIALIZED"): 3448 materialized = True 3449 else: 3450 materialized = None 3451 3452 cte = self.expression( 3453 exp.CTE, 3454 this=self._parse_wrapped(self._parse_statement), 3455 alias=alias, 3456 materialized=materialized, 3457 comments=comments, 3458 ) 3459 3460 values = cte.this 3461 if isinstance(values, exp.Values): 3462 if values.alias: 3463 cte.set("this", exp.select("*").from_(values)) 3464 else: 3465 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3466 3467 return cte 3468 3469 def _parse_table_alias( 3470 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3471 ) -> t.Optional[exp.TableAlias]: 3472 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3473 # so this section tries to parse the clause version and if it fails, it treats the token 3474 # as an identifier (alias) 3475 if self._can_parse_limit_or_offset(): 3476 return None 3477 3478 any_token = self._match(TokenType.ALIAS) 3479 alias = ( 3480 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3481 or self._parse_string_as_identifier() 3482 ) 3483 3484 index = self._index 3485 if self._match(TokenType.L_PAREN): 3486 columns = self._parse_csv(self._parse_function_parameter) 3487 self._match_r_paren() if columns else self._retreat(index) 3488 else: 3489 columns = None 3490 3491 if not alias and not columns: 3492 return None 3493 3494 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3495 3496 # We bubble up comments from the Identifier to the TableAlias 3497 if isinstance(alias, exp.Identifier): 3498 table_alias.add_comments(alias.pop_comments()) 3499 3500 return table_alias 3501 3502 def _parse_subquery( 3503 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3504 ) -> t.Optional[exp.Subquery]: 3505 if not this: 3506 return None 3507 3508 return self.expression( 3509 exp.Subquery, 3510 this=this, 3511 pivots=self._parse_pivots(), 3512 alias=self._parse_table_alias() if parse_alias else None, 3513 sample=self._parse_table_sample(), 3514 ) 3515 3516 def _implicit_unnests_to_explicit(self, this: E) -> E: 3517 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3518 3519 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3520 for i, join in enumerate(this.args.get("joins") or []): 3521 table = join.this 3522 normalized_table = table.copy() 3523 normalized_table.meta["maybe_column"] = True 3524 normalized_table = _norm(normalized_table, dialect=self.dialect) 3525 3526 if isinstance(table, exp.Table) and not join.args.get("on"): 3527 if normalized_table.parts[0].name in refs: 3528 table_as_column = table.to_column() 3529 unnest = exp.Unnest(expressions=[table_as_column]) 3530 3531 # Table.to_column creates a parent Alias node that we want to convert to 3532 # a TableAlias and attach to the Unnest, so it matches the parser's output 3533 if isinstance(table.args.get("alias"), exp.TableAlias): 3534 table_as_column.replace(table_as_column.this) 3535 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3536 3537 table.replace(unnest) 3538 3539 refs.add(normalized_table.alias_or_name) 3540 3541 return this 3542 3543 def _parse_query_modifiers( 3544 self, this: t.Optional[exp.Expression] 3545 ) -> t.Optional[exp.Expression]: 3546 if isinstance(this, self.MODIFIABLES): 3547 for join in self._parse_joins(): 3548 this.append("joins", join) 3549 for lateral in iter(self._parse_lateral, None): 3550 this.append("laterals", lateral) 3551 3552 while True: 3553 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3554 modifier_token = self._curr 3555 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3556 key, expression = parser(self) 3557 3558 if expression: 3559 if this.args.get(key): 3560 self.raise_error( 3561 f"Found multiple '{modifier_token.text.upper()}' clauses", 3562 token=modifier_token, 3563 ) 3564 3565 this.set(key, expression) 3566 if key == "limit": 3567 offset = expression.args.pop("offset", None) 3568 3569 if offset: 3570 offset = exp.Offset(expression=offset) 3571 this.set("offset", offset) 3572 3573 limit_by_expressions = expression.expressions 3574 expression.set("expressions", None) 3575 offset.set("expressions", limit_by_expressions) 3576 continue 3577 break 3578 3579 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3580 this = self._implicit_unnests_to_explicit(this) 3581 3582 return this 3583 3584 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3585 start = self._curr 3586 while self._curr: 3587 self._advance() 3588 3589 end = self._tokens[self._index - 1] 3590 return exp.Hint(expressions=[self._find_sql(start, end)]) 3591 3592 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3593 return self._parse_function_call() 3594 3595 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3596 start_index = self._index 3597 should_fallback_to_string = False 3598 3599 hints = [] 3600 try: 3601 for hint in iter( 3602 lambda: self._parse_csv( 3603 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3604 ), 3605 [], 3606 ): 3607 hints.extend(hint) 3608 except ParseError: 3609 should_fallback_to_string = True 3610 3611 if should_fallback_to_string or self._curr: 3612 self._retreat(start_index) 3613 return self._parse_hint_fallback_to_string() 3614 3615 return self.expression(exp.Hint, expressions=hints) 3616 3617 def _parse_hint(self) -> t.Optional[exp.Hint]: 3618 if self._match(TokenType.HINT) and self._prev_comments: 3619 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3620 3621 return None 3622 3623 def _parse_into(self) -> t.Optional[exp.Into]: 3624 if not self._match(TokenType.INTO): 3625 return None 3626 3627 temp = self._match(TokenType.TEMPORARY) 3628 unlogged = self._match_text_seq("UNLOGGED") 3629 self._match(TokenType.TABLE) 3630 3631 return self.expression( 3632 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3633 ) 3634 3635 def _parse_from( 3636 self, 3637 joins: bool = False, 3638 skip_from_token: bool = False, 3639 consume_pipe: bool = False, 3640 ) -> t.Optional[exp.From]: 3641 if not skip_from_token and not self._match(TokenType.FROM): 3642 return None 3643 3644 return self.expression( 3645 exp.From, 3646 comments=self._prev_comments, 3647 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3648 ) 3649 3650 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3651 return self.expression( 3652 exp.MatchRecognizeMeasure, 3653 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3654 this=self._parse_expression(), 3655 ) 3656 3657 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3658 if not self._match(TokenType.MATCH_RECOGNIZE): 3659 return None 3660 3661 self._match_l_paren() 3662 3663 partition = self._parse_partition_by() 3664 order = self._parse_order() 3665 3666 measures = ( 3667 self._parse_csv(self._parse_match_recognize_measure) 3668 if self._match_text_seq("MEASURES") 3669 else None 3670 ) 3671 3672 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3673 rows = exp.var("ONE ROW PER MATCH") 3674 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3675 text = "ALL ROWS PER MATCH" 3676 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3677 text += " SHOW EMPTY MATCHES" 3678 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3679 text += " OMIT EMPTY MATCHES" 3680 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3681 text += " WITH UNMATCHED ROWS" 3682 rows = exp.var(text) 3683 else: 3684 rows = None 3685 3686 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3687 text = "AFTER MATCH SKIP" 3688 if self._match_text_seq("PAST", "LAST", "ROW"): 3689 text += " PAST LAST ROW" 3690 elif self._match_text_seq("TO", "NEXT", "ROW"): 3691 text += " TO NEXT ROW" 3692 elif self._match_text_seq("TO", "FIRST"): 3693 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3694 elif self._match_text_seq("TO", "LAST"): 3695 text += f" TO LAST {self._advance_any().text}" # type: ignore 3696 after = exp.var(text) 3697 else: 3698 after = None 3699 3700 if self._match_text_seq("PATTERN"): 3701 self._match_l_paren() 3702 3703 if not self._curr: 3704 self.raise_error("Expecting )", self._curr) 3705 3706 paren = 1 3707 start = self._curr 3708 3709 while self._curr and paren > 0: 3710 if self._curr.token_type == TokenType.L_PAREN: 3711 paren += 1 3712 if self._curr.token_type == TokenType.R_PAREN: 3713 paren -= 1 3714 3715 end = self._prev 3716 self._advance() 3717 3718 if paren > 0: 3719 self.raise_error("Expecting )", self._curr) 3720 3721 pattern = exp.var(self._find_sql(start, end)) 3722 else: 3723 pattern = None 3724 3725 define = ( 3726 self._parse_csv(self._parse_name_as_expression) 3727 if self._match_text_seq("DEFINE") 3728 else None 3729 ) 3730 3731 self._match_r_paren() 3732 3733 return self.expression( 3734 exp.MatchRecognize, 3735 partition_by=partition, 3736 order=order, 3737 measures=measures, 3738 rows=rows, 3739 after=after, 3740 pattern=pattern, 3741 define=define, 3742 alias=self._parse_table_alias(), 3743 ) 3744 3745 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3746 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3747 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3748 cross_apply = False 3749 3750 if cross_apply is not None: 3751 this = self._parse_select(table=True) 3752 view = None 3753 outer = None 3754 elif self._match(TokenType.LATERAL): 3755 this = self._parse_select(table=True) 3756 view = self._match(TokenType.VIEW) 3757 outer = self._match(TokenType.OUTER) 3758 else: 3759 return None 3760 3761 if not this: 3762 this = ( 3763 self._parse_unnest() 3764 or self._parse_function() 3765 or self._parse_id_var(any_token=False) 3766 ) 3767 3768 while self._match(TokenType.DOT): 3769 this = exp.Dot( 3770 this=this, 3771 expression=self._parse_function() or self._parse_id_var(any_token=False), 3772 ) 3773 3774 ordinality: t.Optional[bool] = None 3775 3776 if view: 3777 table = self._parse_id_var(any_token=False) 3778 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3779 table_alias: t.Optional[exp.TableAlias] = self.expression( 3780 exp.TableAlias, this=table, columns=columns 3781 ) 3782 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3783 # We move the alias from the lateral's child node to the lateral itself 3784 table_alias = this.args["alias"].pop() 3785 else: 3786 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3787 table_alias = self._parse_table_alias() 3788 3789 return self.expression( 3790 exp.Lateral, 3791 this=this, 3792 view=view, 3793 outer=outer, 3794 alias=table_alias, 3795 cross_apply=cross_apply, 3796 ordinality=ordinality, 3797 ) 3798 3799 def _parse_join_parts( 3800 self, 3801 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3802 return ( 3803 self._match_set(self.JOIN_METHODS) and self._prev, 3804 self._match_set(self.JOIN_SIDES) and self._prev, 3805 self._match_set(self.JOIN_KINDS) and self._prev, 3806 ) 3807 3808 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3809 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3810 this = self._parse_column() 3811 if isinstance(this, exp.Column): 3812 return this.this 3813 return this 3814 3815 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3816 3817 def _parse_join( 3818 self, skip_join_token: bool = False, parse_bracket: bool = False 3819 ) -> t.Optional[exp.Join]: 3820 if self._match(TokenType.COMMA): 3821 table = self._try_parse(self._parse_table) 3822 cross_join = self.expression(exp.Join, this=table) if table else None 3823 3824 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3825 cross_join.set("kind", "CROSS") 3826 3827 return cross_join 3828 3829 index = self._index 3830 method, side, kind = self._parse_join_parts() 3831 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3832 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3833 join_comments = self._prev_comments 3834 3835 if not skip_join_token and not join: 3836 self._retreat(index) 3837 kind = None 3838 method = None 3839 side = None 3840 3841 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3842 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3843 3844 if not skip_join_token and not join and not outer_apply and not cross_apply: 3845 return None 3846 3847 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3848 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3849 kwargs["expressions"] = self._parse_csv( 3850 lambda: self._parse_table(parse_bracket=parse_bracket) 3851 ) 3852 3853 if method: 3854 kwargs["method"] = method.text 3855 if side: 3856 kwargs["side"] = side.text 3857 if kind: 3858 kwargs["kind"] = kind.text 3859 if hint: 3860 kwargs["hint"] = hint 3861 3862 if self._match(TokenType.MATCH_CONDITION): 3863 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3864 3865 if self._match(TokenType.ON): 3866 kwargs["on"] = self._parse_assignment() 3867 elif self._match(TokenType.USING): 3868 kwargs["using"] = self._parse_using_identifiers() 3869 elif ( 3870 not method 3871 and not (outer_apply or cross_apply) 3872 and not isinstance(kwargs["this"], exp.Unnest) 3873 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3874 ): 3875 index = self._index 3876 joins: t.Optional[list] = list(self._parse_joins()) 3877 3878 if joins and self._match(TokenType.ON): 3879 kwargs["on"] = self._parse_assignment() 3880 elif joins and self._match(TokenType.USING): 3881 kwargs["using"] = self._parse_using_identifiers() 3882 else: 3883 joins = None 3884 self._retreat(index) 3885 3886 kwargs["this"].set("joins", joins if joins else None) 3887 3888 kwargs["pivots"] = self._parse_pivots() 3889 3890 comments = [c for token in (method, side, kind) if token for c in token.comments] 3891 comments = (join_comments or []) + comments 3892 3893 if ( 3894 self.ADD_JOIN_ON_TRUE 3895 and not kwargs.get("on") 3896 and not kwargs.get("using") 3897 and not kwargs.get("method") 3898 and kwargs.get("kind") in (None, "INNER", "OUTER") 3899 ): 3900 kwargs["on"] = exp.true() 3901 3902 return self.expression(exp.Join, comments=comments, **kwargs) 3903 3904 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3905 this = self._parse_assignment() 3906 3907 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3908 return this 3909 3910 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3911 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3912 3913 return this 3914 3915 def _parse_index_params(self) -> exp.IndexParameters: 3916 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3917 3918 if self._match(TokenType.L_PAREN, advance=False): 3919 columns = self._parse_wrapped_csv(self._parse_with_operator) 3920 else: 3921 columns = None 3922 3923 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3924 partition_by = self._parse_partition_by() 3925 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3926 tablespace = ( 3927 self._parse_var(any_token=True) 3928 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3929 else None 3930 ) 3931 where = self._parse_where() 3932 3933 on = self._parse_field() if self._match(TokenType.ON) else None 3934 3935 return self.expression( 3936 exp.IndexParameters, 3937 using=using, 3938 columns=columns, 3939 include=include, 3940 partition_by=partition_by, 3941 where=where, 3942 with_storage=with_storage, 3943 tablespace=tablespace, 3944 on=on, 3945 ) 3946 3947 def _parse_index( 3948 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3949 ) -> t.Optional[exp.Index]: 3950 if index or anonymous: 3951 unique = None 3952 primary = None 3953 amp = None 3954 3955 self._match(TokenType.ON) 3956 self._match(TokenType.TABLE) # hive 3957 table = self._parse_table_parts(schema=True) 3958 else: 3959 unique = self._match(TokenType.UNIQUE) 3960 primary = self._match_text_seq("PRIMARY") 3961 amp = self._match_text_seq("AMP") 3962 3963 if not self._match(TokenType.INDEX): 3964 return None 3965 3966 index = self._parse_id_var() 3967 table = None 3968 3969 params = self._parse_index_params() 3970 3971 return self.expression( 3972 exp.Index, 3973 this=index, 3974 table=table, 3975 unique=unique, 3976 primary=primary, 3977 amp=amp, 3978 params=params, 3979 ) 3980 3981 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3982 hints: t.List[exp.Expression] = [] 3983 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3984 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3985 hints.append( 3986 self.expression( 3987 exp.WithTableHint, 3988 expressions=self._parse_csv( 3989 lambda: self._parse_function() or self._parse_var(any_token=True) 3990 ), 3991 ) 3992 ) 3993 self._match_r_paren() 3994 else: 3995 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3996 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3997 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3998 3999 self._match_set((TokenType.INDEX, TokenType.KEY)) 4000 if self._match(TokenType.FOR): 4001 hint.set("target", self._advance_any() and self._prev.text.upper()) 4002 4003 hint.set("expressions", self._parse_wrapped_id_vars()) 4004 hints.append(hint) 4005 4006 return hints or None 4007 4008 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 4009 return ( 4010 (not schema and self._parse_function(optional_parens=False)) 4011 or self._parse_id_var(any_token=False) 4012 or self._parse_string_as_identifier() 4013 or self._parse_placeholder() 4014 ) 4015 4016 def _parse_table_parts( 4017 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 4018 ) -> exp.Table: 4019 catalog = None 4020 db = None 4021 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 4022 4023 while self._match(TokenType.DOT): 4024 if catalog: 4025 # This allows nesting the table in arbitrarily many dot expressions if needed 4026 table = self.expression( 4027 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4028 ) 4029 else: 4030 catalog = db 4031 db = table 4032 # "" used for tsql FROM a..b case 4033 table = self._parse_table_part(schema=schema) or "" 4034 4035 if ( 4036 wildcard 4037 and self._is_connected() 4038 and (isinstance(table, exp.Identifier) or not table) 4039 and self._match(TokenType.STAR) 4040 ): 4041 if isinstance(table, exp.Identifier): 4042 table.args["this"] += "*" 4043 else: 4044 table = exp.Identifier(this="*") 4045 4046 # We bubble up comments from the Identifier to the Table 4047 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4048 4049 if is_db_reference: 4050 catalog = db 4051 db = table 4052 table = None 4053 4054 if not table and not is_db_reference: 4055 self.raise_error(f"Expected table name but got {self._curr}") 4056 if not db and is_db_reference: 4057 self.raise_error(f"Expected database name but got {self._curr}") 4058 4059 table = self.expression( 4060 exp.Table, 4061 comments=comments, 4062 this=table, 4063 db=db, 4064 catalog=catalog, 4065 ) 4066 4067 changes = self._parse_changes() 4068 if changes: 4069 table.set("changes", changes) 4070 4071 at_before = self._parse_historical_data() 4072 if at_before: 4073 table.set("when", at_before) 4074 4075 pivots = self._parse_pivots() 4076 if pivots: 4077 table.set("pivots", pivots) 4078 4079 return table 4080 4081 def _parse_table( 4082 self, 4083 schema: bool = False, 4084 joins: bool = False, 4085 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4086 parse_bracket: bool = False, 4087 is_db_reference: bool = False, 4088 parse_partition: bool = False, 4089 consume_pipe: bool = False, 4090 ) -> t.Optional[exp.Expression]: 4091 lateral = self._parse_lateral() 4092 if lateral: 4093 return lateral 4094 4095 unnest = self._parse_unnest() 4096 if unnest: 4097 return unnest 4098 4099 values = self._parse_derived_table_values() 4100 if values: 4101 return values 4102 4103 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4104 if subquery: 4105 if not subquery.args.get("pivots"): 4106 subquery.set("pivots", self._parse_pivots()) 4107 return subquery 4108 4109 bracket = parse_bracket and self._parse_bracket(None) 4110 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4111 4112 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4113 self._parse_table 4114 ) 4115 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4116 4117 only = self._match(TokenType.ONLY) 4118 4119 this = t.cast( 4120 exp.Expression, 4121 bracket 4122 or rows_from 4123 or self._parse_bracket( 4124 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4125 ), 4126 ) 4127 4128 if only: 4129 this.set("only", only) 4130 4131 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4132 self._match_text_seq("*") 4133 4134 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4135 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4136 this.set("partition", self._parse_partition()) 4137 4138 if schema: 4139 return self._parse_schema(this=this) 4140 4141 version = self._parse_version() 4142 4143 if version: 4144 this.set("version", version) 4145 4146 if self.dialect.ALIAS_POST_TABLESAMPLE: 4147 this.set("sample", self._parse_table_sample()) 4148 4149 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4150 if alias: 4151 this.set("alias", alias) 4152 4153 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4154 return self.expression( 4155 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4156 ) 4157 4158 this.set("hints", self._parse_table_hints()) 4159 4160 if not this.args.get("pivots"): 4161 this.set("pivots", self._parse_pivots()) 4162 4163 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4164 this.set("sample", self._parse_table_sample()) 4165 4166 if joins: 4167 for join in self._parse_joins(): 4168 this.append("joins", join) 4169 4170 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4171 this.set("ordinality", True) 4172 this.set("alias", self._parse_table_alias()) 4173 4174 return this 4175 4176 def _parse_version(self) -> t.Optional[exp.Version]: 4177 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4178 this = "TIMESTAMP" 4179 elif self._match(TokenType.VERSION_SNAPSHOT): 4180 this = "VERSION" 4181 else: 4182 return None 4183 4184 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4185 kind = self._prev.text.upper() 4186 start = self._parse_bitwise() 4187 self._match_texts(("TO", "AND")) 4188 end = self._parse_bitwise() 4189 expression: t.Optional[exp.Expression] = self.expression( 4190 exp.Tuple, expressions=[start, end] 4191 ) 4192 elif self._match_text_seq("CONTAINED", "IN"): 4193 kind = "CONTAINED IN" 4194 expression = self.expression( 4195 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4196 ) 4197 elif self._match(TokenType.ALL): 4198 kind = "ALL" 4199 expression = None 4200 else: 4201 self._match_text_seq("AS", "OF") 4202 kind = "AS OF" 4203 expression = self._parse_type() 4204 4205 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4206 4207 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4208 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4209 index = self._index 4210 historical_data = None 4211 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4212 this = self._prev.text.upper() 4213 kind = ( 4214 self._match(TokenType.L_PAREN) 4215 and self._match_texts(self.HISTORICAL_DATA_KIND) 4216 and self._prev.text.upper() 4217 ) 4218 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4219 4220 if expression: 4221 self._match_r_paren() 4222 historical_data = self.expression( 4223 exp.HistoricalData, this=this, kind=kind, expression=expression 4224 ) 4225 else: 4226 self._retreat(index) 4227 4228 return historical_data 4229 4230 def _parse_changes(self) -> t.Optional[exp.Changes]: 4231 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4232 return None 4233 4234 information = self._parse_var(any_token=True) 4235 self._match_r_paren() 4236 4237 return self.expression( 4238 exp.Changes, 4239 information=information, 4240 at_before=self._parse_historical_data(), 4241 end=self._parse_historical_data(), 4242 ) 4243 4244 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4245 if not self._match_pair(TokenType.UNNEST, TokenType.L_PAREN, advance=False): 4246 return None 4247 4248 self._advance() 4249 4250 expressions = self._parse_wrapped_csv(self._parse_equality) 4251 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4252 4253 alias = self._parse_table_alias() if with_alias else None 4254 4255 if alias: 4256 if self.dialect.UNNEST_COLUMN_ONLY: 4257 if alias.args.get("columns"): 4258 self.raise_error("Unexpected extra column alias in unnest.") 4259 4260 alias.set("columns", [alias.this]) 4261 alias.set("this", None) 4262 4263 columns = alias.args.get("columns") or [] 4264 if offset and len(expressions) < len(columns): 4265 offset = columns.pop() 4266 4267 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4268 self._match(TokenType.ALIAS) 4269 offset = self._parse_id_var( 4270 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4271 ) or exp.to_identifier("offset") 4272 4273 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4274 4275 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4276 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4277 if not is_derived and not ( 4278 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4279 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4280 ): 4281 return None 4282 4283 expressions = self._parse_csv(self._parse_value) 4284 alias = self._parse_table_alias() 4285 4286 if is_derived: 4287 self._match_r_paren() 4288 4289 return self.expression( 4290 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4291 ) 4292 4293 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4294 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4295 as_modifier and self._match_text_seq("USING", "SAMPLE") 4296 ): 4297 return None 4298 4299 bucket_numerator = None 4300 bucket_denominator = None 4301 bucket_field = None 4302 percent = None 4303 size = None 4304 seed = None 4305 4306 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4307 matched_l_paren = self._match(TokenType.L_PAREN) 4308 4309 if self.TABLESAMPLE_CSV: 4310 num = None 4311 expressions = self._parse_csv(self._parse_primary) 4312 else: 4313 expressions = None 4314 num = ( 4315 self._parse_factor() 4316 if self._match(TokenType.NUMBER, advance=False) 4317 else self._parse_primary() or self._parse_placeholder() 4318 ) 4319 4320 if self._match_text_seq("BUCKET"): 4321 bucket_numerator = self._parse_number() 4322 self._match_text_seq("OUT", "OF") 4323 bucket_denominator = bucket_denominator = self._parse_number() 4324 self._match(TokenType.ON) 4325 bucket_field = self._parse_field() 4326 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4327 percent = num 4328 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4329 size = num 4330 else: 4331 percent = num 4332 4333 if matched_l_paren: 4334 self._match_r_paren() 4335 4336 if self._match(TokenType.L_PAREN): 4337 method = self._parse_var(upper=True) 4338 seed = self._match(TokenType.COMMA) and self._parse_number() 4339 self._match_r_paren() 4340 elif self._match_texts(("SEED", "REPEATABLE")): 4341 seed = self._parse_wrapped(self._parse_number) 4342 4343 if not method and self.DEFAULT_SAMPLING_METHOD: 4344 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4345 4346 return self.expression( 4347 exp.TableSample, 4348 expressions=expressions, 4349 method=method, 4350 bucket_numerator=bucket_numerator, 4351 bucket_denominator=bucket_denominator, 4352 bucket_field=bucket_field, 4353 percent=percent, 4354 size=size, 4355 seed=seed, 4356 ) 4357 4358 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4359 return list(iter(self._parse_pivot, None)) or None 4360 4361 def _parse_joins(self) -> t.Iterator[exp.Join]: 4362 return iter(self._parse_join, None) 4363 4364 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4365 if not self._match(TokenType.INTO): 4366 return None 4367 4368 return self.expression( 4369 exp.UnpivotColumns, 4370 this=self._match_text_seq("NAME") and self._parse_column(), 4371 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4372 ) 4373 4374 # https://duckdb.org/docs/sql/statements/pivot 4375 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4376 def _parse_on() -> t.Optional[exp.Expression]: 4377 this = self._parse_bitwise() 4378 4379 if self._match(TokenType.IN): 4380 # PIVOT ... ON col IN (row_val1, row_val2) 4381 return self._parse_in(this) 4382 if self._match(TokenType.ALIAS, advance=False): 4383 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4384 return self._parse_alias(this) 4385 4386 return this 4387 4388 this = self._parse_table() 4389 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4390 into = self._parse_unpivot_columns() 4391 using = self._match(TokenType.USING) and self._parse_csv( 4392 lambda: self._parse_alias(self._parse_function()) 4393 ) 4394 group = self._parse_group() 4395 4396 return self.expression( 4397 exp.Pivot, 4398 this=this, 4399 expressions=expressions, 4400 using=using, 4401 group=group, 4402 unpivot=is_unpivot, 4403 into=into, 4404 ) 4405 4406 def _parse_pivot_in(self) -> exp.In: 4407 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4408 this = self._parse_select_or_expression() 4409 4410 self._match(TokenType.ALIAS) 4411 alias = self._parse_bitwise() 4412 if alias: 4413 if isinstance(alias, exp.Column) and not alias.db: 4414 alias = alias.this 4415 return self.expression(exp.PivotAlias, this=this, alias=alias) 4416 4417 return this 4418 4419 value = self._parse_column() 4420 4421 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4422 self.raise_error("Expecting IN (") 4423 4424 if self._match(TokenType.ANY): 4425 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4426 else: 4427 exprs = self._parse_csv(_parse_aliased_expression) 4428 4429 self._match_r_paren() 4430 return self.expression(exp.In, this=value, expressions=exprs) 4431 4432 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4433 func = self._parse_function() 4434 if not func: 4435 if self._prev and self._prev.token_type == TokenType.COMMA: 4436 return None 4437 self.raise_error("Expecting an aggregation function in PIVOT") 4438 4439 return self._parse_alias(func) 4440 4441 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4442 index = self._index 4443 include_nulls = None 4444 4445 if self._match(TokenType.PIVOT): 4446 unpivot = False 4447 elif self._match(TokenType.UNPIVOT): 4448 unpivot = True 4449 4450 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4451 if self._match_text_seq("INCLUDE", "NULLS"): 4452 include_nulls = True 4453 elif self._match_text_seq("EXCLUDE", "NULLS"): 4454 include_nulls = False 4455 else: 4456 return None 4457 4458 expressions = [] 4459 4460 if not self._match(TokenType.L_PAREN): 4461 self._retreat(index) 4462 return None 4463 4464 if unpivot: 4465 expressions = self._parse_csv(self._parse_column) 4466 else: 4467 expressions = self._parse_csv(self._parse_pivot_aggregation) 4468 4469 if not expressions: 4470 self.raise_error("Failed to parse PIVOT's aggregation list") 4471 4472 if not self._match(TokenType.FOR): 4473 self.raise_error("Expecting FOR") 4474 4475 fields = [] 4476 while True: 4477 field = self._try_parse(self._parse_pivot_in) 4478 if not field: 4479 break 4480 fields.append(field) 4481 4482 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4483 self._parse_bitwise 4484 ) 4485 4486 group = self._parse_group() 4487 4488 self._match_r_paren() 4489 4490 pivot = self.expression( 4491 exp.Pivot, 4492 expressions=expressions, 4493 fields=fields, 4494 unpivot=unpivot, 4495 include_nulls=include_nulls, 4496 default_on_null=default_on_null, 4497 group=group, 4498 ) 4499 4500 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4501 pivot.set("alias", self._parse_table_alias()) 4502 4503 if not unpivot: 4504 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4505 4506 columns: t.List[exp.Expression] = [] 4507 all_fields = [] 4508 for pivot_field in pivot.fields: 4509 pivot_field_expressions = pivot_field.expressions 4510 4511 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4512 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4513 continue 4514 4515 all_fields.append( 4516 [ 4517 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4518 for fld in pivot_field_expressions 4519 ] 4520 ) 4521 4522 if all_fields: 4523 if names: 4524 all_fields.append(names) 4525 4526 # Generate all possible combinations of the pivot columns 4527 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4528 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4529 for fld_parts_tuple in itertools.product(*all_fields): 4530 fld_parts = list(fld_parts_tuple) 4531 4532 if names and self.PREFIXED_PIVOT_COLUMNS: 4533 # Move the "name" to the front of the list 4534 fld_parts.insert(0, fld_parts.pop(-1)) 4535 4536 columns.append(exp.to_identifier("_".join(fld_parts))) 4537 4538 pivot.set("columns", columns) 4539 4540 return pivot 4541 4542 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4543 return [agg.alias for agg in aggregations if agg.alias] 4544 4545 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4546 if not skip_where_token and not self._match(TokenType.PREWHERE): 4547 return None 4548 4549 return self.expression( 4550 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4551 ) 4552 4553 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4554 if not skip_where_token and not self._match(TokenType.WHERE): 4555 return None 4556 4557 return self.expression( 4558 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4559 ) 4560 4561 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4562 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4563 return None 4564 comments = self._prev_comments 4565 4566 elements: t.Dict[str, t.Any] = defaultdict(list) 4567 4568 if self._match(TokenType.ALL): 4569 elements["all"] = True 4570 elif self._match(TokenType.DISTINCT): 4571 elements["all"] = False 4572 4573 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4574 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4575 4576 while True: 4577 index = self._index 4578 4579 elements["expressions"].extend( 4580 self._parse_csv( 4581 lambda: None 4582 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4583 else self._parse_assignment() 4584 ) 4585 ) 4586 4587 before_with_index = self._index 4588 with_prefix = self._match(TokenType.WITH) 4589 4590 cube_or_rollup = self._parse_cube_or_rollup(with_prefix=with_prefix) 4591 if cube_or_rollup: 4592 key = "rollup" if isinstance(cube_or_rollup, exp.Rollup) else "cube" 4593 elements[key].append(cube_or_rollup) 4594 elif self._match(TokenType.GROUPING_SETS): 4595 elements["grouping_sets"].append( 4596 self.expression( 4597 exp.GroupingSets, 4598 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4599 ) 4600 ) 4601 elif self._match_text_seq("TOTALS"): 4602 elements["totals"] = True # type: ignore 4603 4604 if before_with_index <= self._index <= before_with_index + 1: 4605 self._retreat(before_with_index) 4606 break 4607 4608 if index == self._index: 4609 break 4610 4611 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4612 4613 def _parse_cube_or_rollup(self, with_prefix: bool = False) -> t.Optional[exp.Cube | exp.Rollup]: 4614 if self._match(TokenType.CUBE): 4615 kind: t.Type[exp.Cube | exp.Rollup] = exp.Cube 4616 elif self._match(TokenType.ROLLUP): 4617 kind = exp.Rollup 4618 else: 4619 return None 4620 4621 return self.expression( 4622 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4623 ) 4624 4625 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4626 return self._parse_cube_or_rollup() or self._parse_bitwise() 4627 4628 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4629 if not skip_having_token and not self._match(TokenType.HAVING): 4630 return None 4631 return self.expression( 4632 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4633 ) 4634 4635 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4636 if not self._match(TokenType.QUALIFY): 4637 return None 4638 return self.expression(exp.Qualify, this=self._parse_assignment()) 4639 4640 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4641 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4642 exp.Prior, this=self._parse_bitwise() 4643 ) 4644 connect = self._parse_assignment() 4645 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4646 return connect 4647 4648 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4649 if skip_start_token: 4650 start = None 4651 elif self._match(TokenType.START_WITH): 4652 start = self._parse_assignment() 4653 else: 4654 return None 4655 4656 self._match(TokenType.CONNECT_BY) 4657 nocycle = self._match_text_seq("NOCYCLE") 4658 connect = self._parse_connect_with_prior() 4659 4660 if not start and self._match(TokenType.START_WITH): 4661 start = self._parse_assignment() 4662 4663 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4664 4665 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4666 this = self._parse_id_var(any_token=True) 4667 if self._match(TokenType.ALIAS): 4668 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4669 return this 4670 4671 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4672 if self._match_text_seq("INTERPOLATE"): 4673 return self._parse_wrapped_csv(self._parse_name_as_expression) 4674 return None 4675 4676 def _parse_order( 4677 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4678 ) -> t.Optional[exp.Expression]: 4679 siblings = None 4680 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4681 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4682 return this 4683 4684 siblings = True 4685 4686 return self.expression( 4687 exp.Order, 4688 comments=self._prev_comments, 4689 this=this, 4690 expressions=self._parse_csv(self._parse_ordered), 4691 siblings=siblings, 4692 ) 4693 4694 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4695 if not self._match(token): 4696 return None 4697 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4698 4699 def _parse_ordered( 4700 self, parse_method: t.Optional[t.Callable] = None 4701 ) -> t.Optional[exp.Ordered]: 4702 this = parse_method() if parse_method else self._parse_assignment() 4703 if not this: 4704 return None 4705 4706 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4707 this = exp.var("ALL") 4708 4709 asc = self._match(TokenType.ASC) 4710 desc = self._match(TokenType.DESC) or (asc and False) 4711 4712 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4713 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4714 4715 nulls_first = is_nulls_first or False 4716 explicitly_null_ordered = is_nulls_first or is_nulls_last 4717 4718 if ( 4719 not explicitly_null_ordered 4720 and ( 4721 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4722 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4723 ) 4724 and self.dialect.NULL_ORDERING != "nulls_are_last" 4725 ): 4726 nulls_first = True 4727 4728 if self._match_text_seq("WITH", "FILL"): 4729 with_fill = self.expression( 4730 exp.WithFill, 4731 **{ # type: ignore 4732 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4733 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4734 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4735 "interpolate": self._parse_interpolate(), 4736 }, 4737 ) 4738 else: 4739 with_fill = None 4740 4741 return self.expression( 4742 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4743 ) 4744 4745 def _parse_limit_options(self) -> t.Optional[exp.LimitOptions]: 4746 percent = self._match_set((TokenType.PERCENT, TokenType.MOD)) 4747 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4748 self._match_text_seq("ONLY") 4749 with_ties = self._match_text_seq("WITH", "TIES") 4750 4751 if not (percent or rows or with_ties): 4752 return None 4753 4754 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4755 4756 def _parse_limit( 4757 self, 4758 this: t.Optional[exp.Expression] = None, 4759 top: bool = False, 4760 skip_limit_token: bool = False, 4761 ) -> t.Optional[exp.Expression]: 4762 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4763 comments = self._prev_comments 4764 if top: 4765 limit_paren = self._match(TokenType.L_PAREN) 4766 expression = self._parse_term() if limit_paren else self._parse_number() 4767 4768 if limit_paren: 4769 self._match_r_paren() 4770 4771 else: 4772 # Parsing LIMIT x% (i.e x PERCENT) as a term leads to an error, since 4773 # we try to build an exp.Mod expr. For that matter, we backtrack and instead 4774 # consume the factor plus parse the percentage separately 4775 expression = self._try_parse(self._parse_term) or self._parse_factor() 4776 4777 limit_options = self._parse_limit_options() 4778 4779 if self._match(TokenType.COMMA): 4780 offset = expression 4781 expression = self._parse_term() 4782 else: 4783 offset = None 4784 4785 limit_exp = self.expression( 4786 exp.Limit, 4787 this=this, 4788 expression=expression, 4789 offset=offset, 4790 comments=comments, 4791 limit_options=limit_options, 4792 expressions=self._parse_limit_by(), 4793 ) 4794 4795 return limit_exp 4796 4797 if self._match(TokenType.FETCH): 4798 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4799 direction = self._prev.text.upper() if direction else "FIRST" 4800 4801 count = self._parse_field(tokens=self.FETCH_TOKENS) 4802 4803 return self.expression( 4804 exp.Fetch, 4805 direction=direction, 4806 count=count, 4807 limit_options=self._parse_limit_options(), 4808 ) 4809 4810 return this 4811 4812 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4813 if not self._match(TokenType.OFFSET): 4814 return this 4815 4816 count = self._parse_term() 4817 self._match_set((TokenType.ROW, TokenType.ROWS)) 4818 4819 return self.expression( 4820 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4821 ) 4822 4823 def _can_parse_limit_or_offset(self) -> bool: 4824 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4825 return False 4826 4827 index = self._index 4828 result = bool( 4829 self._try_parse(self._parse_limit, retreat=True) 4830 or self._try_parse(self._parse_offset, retreat=True) 4831 ) 4832 self._retreat(index) 4833 return result 4834 4835 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4836 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4837 4838 def _parse_locks(self) -> t.List[exp.Lock]: 4839 locks = [] 4840 while True: 4841 update, key = None, None 4842 if self._match_text_seq("FOR", "UPDATE"): 4843 update = True 4844 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4845 "LOCK", "IN", "SHARE", "MODE" 4846 ): 4847 update = False 4848 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4849 update, key = False, True 4850 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4851 update, key = True, True 4852 else: 4853 break 4854 4855 expressions = None 4856 if self._match_text_seq("OF"): 4857 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4858 4859 wait: t.Optional[bool | exp.Expression] = None 4860 if self._match_text_seq("NOWAIT"): 4861 wait = True 4862 elif self._match_text_seq("WAIT"): 4863 wait = self._parse_primary() 4864 elif self._match_text_seq("SKIP", "LOCKED"): 4865 wait = False 4866 4867 locks.append( 4868 self.expression( 4869 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4870 ) 4871 ) 4872 4873 return locks 4874 4875 def parse_set_operation( 4876 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4877 ) -> t.Optional[exp.Expression]: 4878 start = self._index 4879 _, side_token, kind_token = self._parse_join_parts() 4880 4881 side = side_token.text if side_token else None 4882 kind = kind_token.text if kind_token else None 4883 4884 if not self._match_set(self.SET_OPERATIONS): 4885 self._retreat(start) 4886 return None 4887 4888 token_type = self._prev.token_type 4889 4890 if token_type == TokenType.UNION: 4891 operation: t.Type[exp.SetOperation] = exp.Union 4892 elif token_type == TokenType.EXCEPT: 4893 operation = exp.Except 4894 else: 4895 operation = exp.Intersect 4896 4897 comments = self._prev.comments 4898 4899 if self._match(TokenType.DISTINCT): 4900 distinct: t.Optional[bool] = True 4901 elif self._match(TokenType.ALL): 4902 distinct = False 4903 else: 4904 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4905 if distinct is None: 4906 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4907 4908 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4909 "STRICT", "CORRESPONDING" 4910 ) 4911 if self._match_text_seq("CORRESPONDING"): 4912 by_name = True 4913 if not side and not kind: 4914 kind = "INNER" 4915 4916 on_column_list = None 4917 if by_name and self._match_texts(("ON", "BY")): 4918 on_column_list = self._parse_wrapped_csv(self._parse_column) 4919 4920 expression = self._parse_select( 4921 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4922 ) 4923 4924 return self.expression( 4925 operation, 4926 comments=comments, 4927 this=this, 4928 distinct=distinct, 4929 by_name=by_name, 4930 expression=expression, 4931 side=side, 4932 kind=kind, 4933 on=on_column_list, 4934 ) 4935 4936 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4937 while this: 4938 setop = self.parse_set_operation(this) 4939 if not setop: 4940 break 4941 this = setop 4942 4943 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4944 expression = this.expression 4945 4946 if expression: 4947 for arg in self.SET_OP_MODIFIERS: 4948 expr = expression.args.get(arg) 4949 if expr: 4950 this.set(arg, expr.pop()) 4951 4952 return this 4953 4954 def _parse_expression(self) -> t.Optional[exp.Expression]: 4955 return self._parse_alias(self._parse_assignment()) 4956 4957 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4958 this = self._parse_disjunction() 4959 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4960 # This allows us to parse <non-identifier token> := <expr> 4961 this = exp.column( 4962 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4963 ) 4964 4965 while self._match_set(self.ASSIGNMENT): 4966 if isinstance(this, exp.Column) and len(this.parts) == 1: 4967 this = this.this 4968 4969 this = self.expression( 4970 self.ASSIGNMENT[self._prev.token_type], 4971 this=this, 4972 comments=self._prev_comments, 4973 expression=self._parse_assignment(), 4974 ) 4975 4976 return this 4977 4978 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4979 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4980 4981 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4982 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4983 4984 def _parse_equality(self) -> t.Optional[exp.Expression]: 4985 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4986 4987 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4988 return self._parse_tokens(self._parse_range, self.COMPARISON) 4989 4990 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4991 this = this or self._parse_bitwise() 4992 negate = self._match(TokenType.NOT) 4993 4994 if self._match_set(self.RANGE_PARSERS): 4995 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4996 if not expression: 4997 return this 4998 4999 this = expression 5000 elif self._match(TokenType.ISNULL): 5001 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5002 5003 # Postgres supports ISNULL and NOTNULL for conditions. 5004 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 5005 if self._match(TokenType.NOTNULL): 5006 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5007 this = self.expression(exp.Not, this=this) 5008 5009 if negate: 5010 this = self._negate_range(this) 5011 5012 if self._match(TokenType.IS): 5013 this = self._parse_is(this) 5014 5015 return this 5016 5017 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5018 if not this: 5019 return this 5020 5021 return self.expression(exp.Not, this=this) 5022 5023 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5024 index = self._index - 1 5025 negate = self._match(TokenType.NOT) 5026 5027 if self._match_text_seq("DISTINCT", "FROM"): 5028 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5029 return self.expression(klass, this=this, expression=self._parse_bitwise()) 5030 5031 if self._match(TokenType.JSON): 5032 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5033 5034 if self._match_text_seq("WITH"): 5035 _with = True 5036 elif self._match_text_seq("WITHOUT"): 5037 _with = False 5038 else: 5039 _with = None 5040 5041 unique = self._match(TokenType.UNIQUE) 5042 self._match_text_seq("KEYS") 5043 expression: t.Optional[exp.Expression] = self.expression( 5044 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5045 ) 5046 else: 5047 expression = self._parse_primary() or self._parse_null() 5048 if not expression: 5049 self._retreat(index) 5050 return None 5051 5052 this = self.expression(exp.Is, this=this, expression=expression) 5053 return self.expression(exp.Not, this=this) if negate else this 5054 5055 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5056 unnest = self._parse_unnest(with_alias=False) 5057 if unnest: 5058 this = self.expression(exp.In, this=this, unnest=unnest) 5059 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5060 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5061 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5062 5063 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5064 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5065 else: 5066 this = self.expression(exp.In, this=this, expressions=expressions) 5067 5068 if matched_l_paren: 5069 self._match_r_paren(this) 5070 elif not self._match(TokenType.R_BRACKET, expression=this): 5071 self.raise_error("Expecting ]") 5072 else: 5073 this = self.expression(exp.In, this=this, field=self._parse_column()) 5074 5075 return this 5076 5077 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5078 symmetric = None 5079 if self._match_text_seq("SYMMETRIC"): 5080 symmetric = True 5081 elif self._match_text_seq("ASYMMETRIC"): 5082 symmetric = False 5083 5084 low = self._parse_bitwise() 5085 self._match(TokenType.AND) 5086 high = self._parse_bitwise() 5087 5088 return self.expression( 5089 exp.Between, 5090 this=this, 5091 low=low, 5092 high=high, 5093 symmetric=symmetric, 5094 ) 5095 5096 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5097 if not self._match(TokenType.ESCAPE): 5098 return this 5099 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5100 5101 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5102 index = self._index 5103 5104 if not self._match(TokenType.INTERVAL) and match_interval: 5105 return None 5106 5107 if self._match(TokenType.STRING, advance=False): 5108 this = self._parse_primary() 5109 else: 5110 this = self._parse_term() 5111 5112 if not this or ( 5113 isinstance(this, exp.Column) 5114 and not this.table 5115 and not this.this.quoted 5116 and self._curr 5117 and self._curr.text.upper() not in self.dialect.VALID_INTERVAL_UNITS 5118 ): 5119 self._retreat(index) 5120 return None 5121 5122 # handle day-time format interval span with omitted units: 5123 # INTERVAL '<number days> hh[:][mm[:ss[.ff]]]' <maybe `unit TO unit`> 5124 interval_span_units_omitted = None 5125 if ( 5126 this 5127 and this.is_string 5128 and self.SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT 5129 and exp.INTERVAL_DAY_TIME_RE.match(this.name) 5130 ): 5131 index = self._index 5132 5133 # Var "TO" Var 5134 first_unit = self._parse_var(any_token=True, upper=True) 5135 second_unit = None 5136 if first_unit and self._match_text_seq("TO"): 5137 second_unit = self._parse_var(any_token=True, upper=True) 5138 5139 interval_span_units_omitted = not (first_unit and second_unit) 5140 5141 self._retreat(index) 5142 5143 unit = ( 5144 None 5145 if interval_span_units_omitted 5146 else ( 5147 self._parse_function() 5148 or ( 5149 not self._match(TokenType.ALIAS, advance=False) 5150 and self._parse_var(any_token=True, upper=True) 5151 ) 5152 ) 5153 ) 5154 5155 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5156 # each INTERVAL expression into this canonical form so it's easy to transpile 5157 if this and this.is_number: 5158 this = exp.Literal.string(this.to_py()) 5159 elif this and this.is_string: 5160 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5161 if parts and unit: 5162 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5163 unit = None 5164 self._retreat(self._index - 1) 5165 5166 if len(parts) == 1: 5167 this = exp.Literal.string(parts[0][0]) 5168 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5169 5170 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5171 unit = self.expression( 5172 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5173 ) 5174 5175 interval = self.expression(exp.Interval, this=this, unit=unit) 5176 5177 index = self._index 5178 self._match(TokenType.PLUS) 5179 5180 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5181 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5182 return self.expression( 5183 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5184 ) 5185 5186 self._retreat(index) 5187 return interval 5188 5189 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5190 this = self._parse_term() 5191 5192 while True: 5193 if self._match_set(self.BITWISE): 5194 this = self.expression( 5195 self.BITWISE[self._prev.token_type], 5196 this=this, 5197 expression=self._parse_term(), 5198 ) 5199 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5200 this = self.expression( 5201 exp.DPipe, 5202 this=this, 5203 expression=self._parse_term(), 5204 safe=not self.dialect.STRICT_STRING_CONCAT, 5205 ) 5206 elif self._match(TokenType.DQMARK): 5207 this = self.expression( 5208 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5209 ) 5210 elif self._match_pair(TokenType.LT, TokenType.LT): 5211 this = self.expression( 5212 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5213 ) 5214 elif self._match_pair(TokenType.GT, TokenType.GT): 5215 this = self.expression( 5216 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5217 ) 5218 else: 5219 break 5220 5221 return this 5222 5223 def _parse_term(self) -> t.Optional[exp.Expression]: 5224 this = self._parse_factor() 5225 5226 while self._match_set(self.TERM): 5227 klass = self.TERM[self._prev.token_type] 5228 comments = self._prev_comments 5229 expression = self._parse_factor() 5230 5231 this = self.expression(klass, this=this, comments=comments, expression=expression) 5232 5233 if isinstance(this, exp.Collate): 5234 expr = this.expression 5235 5236 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5237 # fallback to Identifier / Var 5238 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5239 ident = expr.this 5240 if isinstance(ident, exp.Identifier): 5241 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5242 5243 return this 5244 5245 def _parse_factor(self) -> t.Optional[exp.Expression]: 5246 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5247 this = parse_method() 5248 5249 while self._match_set(self.FACTOR): 5250 klass = self.FACTOR[self._prev.token_type] 5251 comments = self._prev_comments 5252 expression = parse_method() 5253 5254 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5255 self._retreat(self._index - 1) 5256 return this 5257 5258 this = self.expression(klass, this=this, comments=comments, expression=expression) 5259 5260 if isinstance(this, exp.Div): 5261 this.args["typed"] = self.dialect.TYPED_DIVISION 5262 this.args["safe"] = self.dialect.SAFE_DIVISION 5263 5264 return this 5265 5266 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5267 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5268 5269 def _parse_unary(self) -> t.Optional[exp.Expression]: 5270 if self._match_set(self.UNARY_PARSERS): 5271 return self.UNARY_PARSERS[self._prev.token_type](self) 5272 return self._parse_at_time_zone(self._parse_type()) 5273 5274 def _parse_type( 5275 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5276 ) -> t.Optional[exp.Expression]: 5277 interval = parse_interval and self._parse_interval() 5278 if interval: 5279 return interval 5280 5281 index = self._index 5282 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5283 5284 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5285 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5286 if isinstance(data_type, exp.Cast): 5287 # This constructor can contain ops directly after it, for instance struct unnesting: 5288 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5289 return self._parse_column_ops(data_type) 5290 5291 if data_type: 5292 index2 = self._index 5293 this = self._parse_primary() 5294 5295 if isinstance(this, exp.Literal): 5296 literal = this.name 5297 this = self._parse_column_ops(this) 5298 5299 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5300 if parser: 5301 return parser(self, this, data_type) 5302 5303 if ( 5304 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5305 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5306 and TIME_ZONE_RE.search(literal) 5307 ): 5308 data_type = exp.DataType.build("TIMESTAMPTZ") 5309 5310 return self.expression(exp.Cast, this=this, to=data_type) 5311 5312 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5313 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5314 # 5315 # If the index difference here is greater than 1, that means the parser itself must have 5316 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5317 # 5318 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5319 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5320 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5321 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5322 # 5323 # In these cases, we don't really want to return the converted type, but instead retreat 5324 # and try to parse a Column or Identifier in the section below. 5325 if data_type.expressions and index2 - index > 1: 5326 self._retreat(index2) 5327 return self._parse_column_ops(data_type) 5328 5329 self._retreat(index) 5330 5331 if fallback_to_identifier: 5332 return self._parse_id_var() 5333 5334 this = self._parse_column() 5335 return this and self._parse_column_ops(this) 5336 5337 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5338 this = self._parse_type() 5339 if not this: 5340 return None 5341 5342 if isinstance(this, exp.Column) and not this.table: 5343 this = exp.var(this.name.upper()) 5344 5345 return self.expression( 5346 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5347 ) 5348 5349 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5350 type_name = identifier.name 5351 5352 while self._match(TokenType.DOT): 5353 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5354 5355 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5356 5357 def _parse_types( 5358 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5359 ) -> t.Optional[exp.Expression]: 5360 index = self._index 5361 5362 this: t.Optional[exp.Expression] = None 5363 prefix = self._match_text_seq("SYSUDTLIB", ".") 5364 5365 if self._match_set(self.TYPE_TOKENS): 5366 type_token = self._prev.token_type 5367 else: 5368 type_token = None 5369 identifier = allow_identifiers and self._parse_id_var( 5370 any_token=False, tokens=(TokenType.VAR,) 5371 ) 5372 if isinstance(identifier, exp.Identifier): 5373 try: 5374 tokens = self.dialect.tokenize(identifier.name) 5375 except TokenError: 5376 tokens = None 5377 5378 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5379 type_token = tokens[0].token_type 5380 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5381 this = self._parse_user_defined_type(identifier) 5382 else: 5383 self._retreat(self._index - 1) 5384 return None 5385 else: 5386 return None 5387 5388 if type_token == TokenType.PSEUDO_TYPE: 5389 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5390 5391 if type_token == TokenType.OBJECT_IDENTIFIER: 5392 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5393 5394 # https://materialize.com/docs/sql/types/map/ 5395 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5396 key_type = self._parse_types( 5397 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5398 ) 5399 if not self._match(TokenType.FARROW): 5400 self._retreat(index) 5401 return None 5402 5403 value_type = self._parse_types( 5404 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5405 ) 5406 if not self._match(TokenType.R_BRACKET): 5407 self._retreat(index) 5408 return None 5409 5410 return exp.DataType( 5411 this=exp.DataType.Type.MAP, 5412 expressions=[key_type, value_type], 5413 nested=True, 5414 prefix=prefix, 5415 ) 5416 5417 nested = type_token in self.NESTED_TYPE_TOKENS 5418 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5419 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5420 expressions = None 5421 maybe_func = False 5422 5423 if self._match(TokenType.L_PAREN): 5424 if is_struct: 5425 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5426 elif nested: 5427 expressions = self._parse_csv( 5428 lambda: self._parse_types( 5429 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5430 ) 5431 ) 5432 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5433 this = expressions[0] 5434 this.set("nullable", True) 5435 self._match_r_paren() 5436 return this 5437 elif type_token in self.ENUM_TYPE_TOKENS: 5438 expressions = self._parse_csv(self._parse_equality) 5439 elif is_aggregate: 5440 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5441 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5442 ) 5443 if not func_or_ident: 5444 return None 5445 expressions = [func_or_ident] 5446 if self._match(TokenType.COMMA): 5447 expressions.extend( 5448 self._parse_csv( 5449 lambda: self._parse_types( 5450 check_func=check_func, 5451 schema=schema, 5452 allow_identifiers=allow_identifiers, 5453 ) 5454 ) 5455 ) 5456 else: 5457 expressions = self._parse_csv(self._parse_type_size) 5458 5459 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5460 if type_token == TokenType.VECTOR and len(expressions) == 2: 5461 expressions = self._parse_vector_expressions(expressions) 5462 5463 if not self._match(TokenType.R_PAREN): 5464 self._retreat(index) 5465 return None 5466 5467 maybe_func = True 5468 5469 values: t.Optional[t.List[exp.Expression]] = None 5470 5471 if nested and self._match(TokenType.LT): 5472 if is_struct: 5473 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5474 else: 5475 expressions = self._parse_csv( 5476 lambda: self._parse_types( 5477 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5478 ) 5479 ) 5480 5481 if not self._match(TokenType.GT): 5482 self.raise_error("Expecting >") 5483 5484 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5485 values = self._parse_csv(self._parse_assignment) 5486 if not values and is_struct: 5487 values = None 5488 self._retreat(self._index - 1) 5489 else: 5490 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5491 5492 if type_token in self.TIMESTAMPS: 5493 if self._match_text_seq("WITH", "TIME", "ZONE"): 5494 maybe_func = False 5495 tz_type = ( 5496 exp.DataType.Type.TIMETZ 5497 if type_token in self.TIMES 5498 else exp.DataType.Type.TIMESTAMPTZ 5499 ) 5500 this = exp.DataType(this=tz_type, expressions=expressions) 5501 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5502 maybe_func = False 5503 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5504 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5505 maybe_func = False 5506 elif type_token == TokenType.INTERVAL: 5507 unit = self._parse_var(upper=True) 5508 if unit: 5509 if self._match_text_seq("TO"): 5510 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5511 5512 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5513 else: 5514 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5515 elif type_token == TokenType.VOID: 5516 this = exp.DataType(this=exp.DataType.Type.NULL) 5517 5518 if maybe_func and check_func: 5519 index2 = self._index 5520 peek = self._parse_string() 5521 5522 if not peek: 5523 self._retreat(index) 5524 return None 5525 5526 self._retreat(index2) 5527 5528 if not this: 5529 if self._match_text_seq("UNSIGNED"): 5530 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5531 if not unsigned_type_token: 5532 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5533 5534 type_token = unsigned_type_token or type_token 5535 5536 # NULLABLE without parentheses can be a column (Presto/Trino) 5537 if type_token == TokenType.NULLABLE and not expressions: 5538 self._retreat(index) 5539 return None 5540 5541 this = exp.DataType( 5542 this=exp.DataType.Type[type_token.value], 5543 expressions=expressions, 5544 nested=nested, 5545 prefix=prefix, 5546 ) 5547 5548 # Empty arrays/structs are allowed 5549 if values is not None: 5550 cls = exp.Struct if is_struct else exp.Array 5551 this = exp.cast(cls(expressions=values), this, copy=False) 5552 5553 elif expressions: 5554 this.set("expressions", expressions) 5555 5556 # https://materialize.com/docs/sql/types/list/#type-name 5557 while self._match(TokenType.LIST): 5558 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5559 5560 index = self._index 5561 5562 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5563 matched_array = self._match(TokenType.ARRAY) 5564 5565 while self._curr: 5566 datatype_token = self._prev.token_type 5567 matched_l_bracket = self._match(TokenType.L_BRACKET) 5568 5569 if (not matched_l_bracket and not matched_array) or ( 5570 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5571 ): 5572 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5573 # not to be confused with the fixed size array parsing 5574 break 5575 5576 matched_array = False 5577 values = self._parse_csv(self._parse_assignment) or None 5578 if ( 5579 values 5580 and not schema 5581 and ( 5582 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5583 ) 5584 ): 5585 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5586 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5587 self._retreat(index) 5588 break 5589 5590 this = exp.DataType( 5591 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5592 ) 5593 self._match(TokenType.R_BRACKET) 5594 5595 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5596 converter = self.TYPE_CONVERTERS.get(this.this) 5597 if converter: 5598 this = converter(t.cast(exp.DataType, this)) 5599 5600 return this 5601 5602 def _parse_vector_expressions( 5603 self, expressions: t.List[exp.Expression] 5604 ) -> t.List[exp.Expression]: 5605 return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] 5606 5607 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5608 index = self._index 5609 5610 if ( 5611 self._curr 5612 and self._next 5613 and self._curr.token_type in self.TYPE_TOKENS 5614 and self._next.token_type in self.TYPE_TOKENS 5615 ): 5616 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5617 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5618 this = self._parse_id_var() 5619 else: 5620 this = ( 5621 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5622 or self._parse_id_var() 5623 ) 5624 5625 self._match(TokenType.COLON) 5626 5627 if ( 5628 type_required 5629 and not isinstance(this, exp.DataType) 5630 and not self._match_set(self.TYPE_TOKENS, advance=False) 5631 ): 5632 self._retreat(index) 5633 return self._parse_types() 5634 5635 return self._parse_column_def(this) 5636 5637 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5638 if not self._match_text_seq("AT", "TIME", "ZONE"): 5639 return this 5640 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5641 5642 def _parse_column(self) -> t.Optional[exp.Expression]: 5643 this = self._parse_column_reference() 5644 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5645 5646 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5647 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5648 5649 return column 5650 5651 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5652 this = self._parse_field() 5653 if ( 5654 not this 5655 and self._match(TokenType.VALUES, advance=False) 5656 and self.VALUES_FOLLOWED_BY_PAREN 5657 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5658 ): 5659 this = self._parse_id_var() 5660 5661 if isinstance(this, exp.Identifier): 5662 # We bubble up comments from the Identifier to the Column 5663 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5664 5665 return this 5666 5667 def _parse_colon_as_variant_extract( 5668 self, this: t.Optional[exp.Expression] 5669 ) -> t.Optional[exp.Expression]: 5670 casts = [] 5671 json_path = [] 5672 escape = None 5673 5674 while self._match(TokenType.COLON): 5675 start_index = self._index 5676 5677 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5678 path = self._parse_column_ops( 5679 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5680 ) 5681 5682 # The cast :: operator has a lower precedence than the extraction operator :, so 5683 # we rearrange the AST appropriately to avoid casting the JSON path 5684 while isinstance(path, exp.Cast): 5685 casts.append(path.to) 5686 path = path.this 5687 5688 if casts: 5689 dcolon_offset = next( 5690 i 5691 for i, t in enumerate(self._tokens[start_index:]) 5692 if t.token_type == TokenType.DCOLON 5693 ) 5694 end_token = self._tokens[start_index + dcolon_offset - 1] 5695 else: 5696 end_token = self._prev 5697 5698 if path: 5699 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5700 # it'll roundtrip to a string literal in GET_PATH 5701 if isinstance(path, exp.Identifier) and path.quoted: 5702 escape = True 5703 5704 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5705 5706 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5707 # Databricks transforms it back to the colon/dot notation 5708 if json_path: 5709 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5710 5711 if json_path_expr: 5712 json_path_expr.set("escape", escape) 5713 5714 this = self.expression( 5715 exp.JSONExtract, 5716 this=this, 5717 expression=json_path_expr, 5718 variant_extract=True, 5719 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5720 ) 5721 5722 while casts: 5723 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5724 5725 return this 5726 5727 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5728 return self._parse_types() 5729 5730 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5731 this = self._parse_bracket(this) 5732 5733 while self._match_set(self.COLUMN_OPERATORS): 5734 op_token = self._prev.token_type 5735 op = self.COLUMN_OPERATORS.get(op_token) 5736 5737 if op_token in self.CAST_COLUMN_OPERATORS: 5738 field = self._parse_dcolon() 5739 if not field: 5740 self.raise_error("Expected type") 5741 elif op and self._curr: 5742 field = self._parse_column_reference() or self._parse_bitwise() 5743 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5744 field = self._parse_column_ops(field) 5745 else: 5746 field = self._parse_field(any_token=True, anonymous_func=True) 5747 5748 # Function calls can be qualified, e.g., x.y.FOO() 5749 # This converts the final AST to a series of Dots leading to the function call 5750 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5751 if isinstance(field, (exp.Func, exp.Window)) and this: 5752 this = this.transform( 5753 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5754 ) 5755 5756 if op: 5757 this = op(self, this, field) 5758 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5759 this = self.expression( 5760 exp.Column, 5761 comments=this.comments, 5762 this=field, 5763 table=this.this, 5764 db=this.args.get("table"), 5765 catalog=this.args.get("db"), 5766 ) 5767 elif isinstance(field, exp.Window): 5768 # Move the exp.Dot's to the window's function 5769 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5770 field.set("this", window_func) 5771 this = field 5772 else: 5773 this = self.expression(exp.Dot, this=this, expression=field) 5774 5775 if field and field.comments: 5776 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5777 5778 this = self._parse_bracket(this) 5779 5780 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5781 5782 def _parse_paren(self) -> t.Optional[exp.Expression]: 5783 if not self._match(TokenType.L_PAREN): 5784 return None 5785 5786 comments = self._prev_comments 5787 query = self._parse_select() 5788 5789 if query: 5790 expressions = [query] 5791 else: 5792 expressions = self._parse_expressions() 5793 5794 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5795 5796 if not this and self._match(TokenType.R_PAREN, advance=False): 5797 this = self.expression(exp.Tuple) 5798 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5799 this = self._parse_subquery(this=this, parse_alias=False) 5800 elif isinstance(this, exp.Subquery): 5801 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5802 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5803 this = self.expression(exp.Tuple, expressions=expressions) 5804 else: 5805 this = self.expression(exp.Paren, this=this) 5806 5807 if this: 5808 this.add_comments(comments) 5809 5810 self._match_r_paren(expression=this) 5811 5812 if isinstance(this, exp.Paren) and isinstance(this.this, exp.AggFunc): 5813 return self._parse_window(this) 5814 5815 return this 5816 5817 def _parse_primary(self) -> t.Optional[exp.Expression]: 5818 if self._match_set(self.PRIMARY_PARSERS): 5819 token_type = self._prev.token_type 5820 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5821 5822 if token_type == TokenType.STRING: 5823 expressions = [primary] 5824 while self._match(TokenType.STRING): 5825 expressions.append(exp.Literal.string(self._prev.text)) 5826 5827 if len(expressions) > 1: 5828 return self.expression(exp.Concat, expressions=expressions) 5829 5830 return primary 5831 5832 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5833 return exp.Literal.number(f"0.{self._prev.text}") 5834 5835 return self._parse_paren() 5836 5837 def _parse_field( 5838 self, 5839 any_token: bool = False, 5840 tokens: t.Optional[t.Collection[TokenType]] = None, 5841 anonymous_func: bool = False, 5842 ) -> t.Optional[exp.Expression]: 5843 if anonymous_func: 5844 field = ( 5845 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5846 or self._parse_primary() 5847 ) 5848 else: 5849 field = self._parse_primary() or self._parse_function( 5850 anonymous=anonymous_func, any_token=any_token 5851 ) 5852 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5853 5854 def _parse_function( 5855 self, 5856 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5857 anonymous: bool = False, 5858 optional_parens: bool = True, 5859 any_token: bool = False, 5860 ) -> t.Optional[exp.Expression]: 5861 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5862 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5863 fn_syntax = False 5864 if ( 5865 self._match(TokenType.L_BRACE, advance=False) 5866 and self._next 5867 and self._next.text.upper() == "FN" 5868 ): 5869 self._advance(2) 5870 fn_syntax = True 5871 5872 func = self._parse_function_call( 5873 functions=functions, 5874 anonymous=anonymous, 5875 optional_parens=optional_parens, 5876 any_token=any_token, 5877 ) 5878 5879 if fn_syntax: 5880 self._match(TokenType.R_BRACE) 5881 5882 return func 5883 5884 def _parse_function_args(self, alias: bool = False) -> t.List[exp.Expression]: 5885 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5886 5887 def _parse_function_call( 5888 self, 5889 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5890 anonymous: bool = False, 5891 optional_parens: bool = True, 5892 any_token: bool = False, 5893 ) -> t.Optional[exp.Expression]: 5894 if not self._curr: 5895 return None 5896 5897 comments = self._curr.comments 5898 prev = self._prev 5899 token = self._curr 5900 token_type = self._curr.token_type 5901 this = self._curr.text 5902 upper = this.upper() 5903 5904 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5905 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5906 self._advance() 5907 return self._parse_window(parser(self)) 5908 5909 if not self._next or self._next.token_type != TokenType.L_PAREN: 5910 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5911 self._advance() 5912 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5913 5914 return None 5915 5916 if any_token: 5917 if token_type in self.RESERVED_TOKENS: 5918 return None 5919 elif token_type not in self.FUNC_TOKENS: 5920 return None 5921 5922 self._advance(2) 5923 5924 parser = self.FUNCTION_PARSERS.get(upper) 5925 if parser and not anonymous: 5926 this = parser(self) 5927 else: 5928 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5929 5930 if subquery_predicate: 5931 expr = None 5932 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5933 expr = self._parse_select() 5934 self._match_r_paren() 5935 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5936 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5937 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5938 self._advance(-1) 5939 expr = self._parse_bitwise() 5940 5941 if expr: 5942 return self.expression(subquery_predicate, comments=comments, this=expr) 5943 5944 if functions is None: 5945 functions = self.FUNCTIONS 5946 5947 function = functions.get(upper) 5948 known_function = function and not anonymous 5949 5950 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5951 args = self._parse_function_args(alias) 5952 5953 post_func_comments = self._curr and self._curr.comments 5954 if known_function and post_func_comments: 5955 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5956 # call we'll construct it as exp.Anonymous, even if it's "known" 5957 if any( 5958 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5959 for comment in post_func_comments 5960 ): 5961 known_function = False 5962 5963 if alias and known_function: 5964 args = self._kv_to_prop_eq(args) 5965 5966 if known_function: 5967 func_builder = t.cast(t.Callable, function) 5968 5969 if "dialect" in func_builder.__code__.co_varnames: 5970 func = func_builder(args, dialect=self.dialect) 5971 else: 5972 func = func_builder(args) 5973 5974 func = self.validate_expression(func, args) 5975 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5976 func.meta["name"] = this 5977 5978 this = func 5979 else: 5980 if token_type == TokenType.IDENTIFIER: 5981 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5982 5983 this = self.expression(exp.Anonymous, this=this, expressions=args) 5984 this = this.update_positions(token) 5985 5986 if isinstance(this, exp.Expression): 5987 this.add_comments(comments) 5988 5989 self._match_r_paren(this) 5990 return self._parse_window(this) 5991 5992 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5993 return expression 5994 5995 def _kv_to_prop_eq( 5996 self, expressions: t.List[exp.Expression], parse_map: bool = False 5997 ) -> t.List[exp.Expression]: 5998 transformed = [] 5999 6000 for index, e in enumerate(expressions): 6001 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 6002 if isinstance(e, exp.Alias): 6003 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 6004 6005 if not isinstance(e, exp.PropertyEQ): 6006 e = self.expression( 6007 exp.PropertyEQ, 6008 this=e.this if parse_map else exp.to_identifier(e.this.name), 6009 expression=e.expression, 6010 ) 6011 6012 if isinstance(e.this, exp.Column): 6013 e.this.replace(e.this.this) 6014 else: 6015 e = self._to_prop_eq(e, index) 6016 6017 transformed.append(e) 6018 6019 return transformed 6020 6021 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 6022 return self._parse_statement() 6023 6024 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 6025 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 6026 6027 def _parse_user_defined_function( 6028 self, kind: t.Optional[TokenType] = None 6029 ) -> t.Optional[exp.Expression]: 6030 this = self._parse_table_parts(schema=True) 6031 6032 if not self._match(TokenType.L_PAREN): 6033 return this 6034 6035 expressions = self._parse_csv(self._parse_function_parameter) 6036 self._match_r_paren() 6037 return self.expression( 6038 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 6039 ) 6040 6041 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 6042 literal = self._parse_primary() 6043 if literal: 6044 return self.expression(exp.Introducer, this=token.text, expression=literal) 6045 6046 return self._identifier_expression(token) 6047 6048 def _parse_session_parameter(self) -> exp.SessionParameter: 6049 kind = None 6050 this = self._parse_id_var() or self._parse_primary() 6051 6052 if this and self._match(TokenType.DOT): 6053 kind = this.name 6054 this = self._parse_var() or self._parse_primary() 6055 6056 return self.expression(exp.SessionParameter, this=this, kind=kind) 6057 6058 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 6059 return self._parse_id_var() 6060 6061 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 6062 index = self._index 6063 6064 if self._match(TokenType.L_PAREN): 6065 expressions = t.cast( 6066 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 6067 ) 6068 6069 if not self._match(TokenType.R_PAREN): 6070 self._retreat(index) 6071 else: 6072 expressions = [self._parse_lambda_arg()] 6073 6074 if self._match_set(self.LAMBDAS): 6075 return self.LAMBDAS[self._prev.token_type](self, expressions) 6076 6077 self._retreat(index) 6078 6079 this: t.Optional[exp.Expression] 6080 6081 if self._match(TokenType.DISTINCT): 6082 this = self.expression( 6083 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6084 ) 6085 else: 6086 this = self._parse_select_or_expression(alias=alias) 6087 6088 return self._parse_limit( 6089 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6090 ) 6091 6092 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6093 index = self._index 6094 if not self._match(TokenType.L_PAREN): 6095 return this 6096 6097 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6098 # expr can be of both types 6099 if self._match_set(self.SELECT_START_TOKENS): 6100 self._retreat(index) 6101 return this 6102 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6103 self._match_r_paren() 6104 return self.expression(exp.Schema, this=this, expressions=args) 6105 6106 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6107 return self._parse_column_def(self._parse_field(any_token=True)) 6108 6109 def _parse_column_def( 6110 self, this: t.Optional[exp.Expression], computed_column: bool = True 6111 ) -> t.Optional[exp.Expression]: 6112 # column defs are not really columns, they're identifiers 6113 if isinstance(this, exp.Column): 6114 this = this.this 6115 6116 if not computed_column: 6117 self._match(TokenType.ALIAS) 6118 6119 kind = self._parse_types(schema=True) 6120 6121 if self._match_text_seq("FOR", "ORDINALITY"): 6122 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6123 6124 constraints: t.List[exp.Expression] = [] 6125 6126 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6127 ("ALIAS", "MATERIALIZED") 6128 ): 6129 persisted = self._prev.text.upper() == "MATERIALIZED" 6130 constraint_kind = exp.ComputedColumnConstraint( 6131 this=self._parse_assignment(), 6132 persisted=persisted or self._match_text_seq("PERSISTED"), 6133 data_type=exp.Var(this="AUTO") 6134 if self._match_text_seq("AUTO") 6135 else self._parse_types(), 6136 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6137 ) 6138 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6139 elif ( 6140 kind 6141 and self._match(TokenType.ALIAS, advance=False) 6142 and ( 6143 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6144 or (self._next and self._next.token_type == TokenType.L_PAREN) 6145 ) 6146 ): 6147 self._advance() 6148 constraints.append( 6149 self.expression( 6150 exp.ColumnConstraint, 6151 kind=exp.ComputedColumnConstraint( 6152 this=self._parse_disjunction(), 6153 persisted=self._match_texts(("STORED", "VIRTUAL")) 6154 and self._prev.text.upper() == "STORED", 6155 ), 6156 ) 6157 ) 6158 6159 while True: 6160 constraint = self._parse_column_constraint() 6161 if not constraint: 6162 break 6163 constraints.append(constraint) 6164 6165 if not kind and not constraints: 6166 return this 6167 6168 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6169 6170 def _parse_auto_increment( 6171 self, 6172 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6173 start = None 6174 increment = None 6175 order = None 6176 6177 if self._match(TokenType.L_PAREN, advance=False): 6178 args = self._parse_wrapped_csv(self._parse_bitwise) 6179 start = seq_get(args, 0) 6180 increment = seq_get(args, 1) 6181 elif self._match_text_seq("START"): 6182 start = self._parse_bitwise() 6183 self._match_text_seq("INCREMENT") 6184 increment = self._parse_bitwise() 6185 if self._match_text_seq("ORDER"): 6186 order = True 6187 elif self._match_text_seq("NOORDER"): 6188 order = False 6189 6190 if start and increment: 6191 return exp.GeneratedAsIdentityColumnConstraint( 6192 start=start, increment=increment, this=False, order=order 6193 ) 6194 6195 return exp.AutoIncrementColumnConstraint() 6196 6197 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6198 if not self._match_text_seq("REFRESH"): 6199 self._retreat(self._index - 1) 6200 return None 6201 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6202 6203 def _parse_compress(self) -> exp.CompressColumnConstraint: 6204 if self._match(TokenType.L_PAREN, advance=False): 6205 return self.expression( 6206 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6207 ) 6208 6209 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6210 6211 def _parse_generated_as_identity( 6212 self, 6213 ) -> ( 6214 exp.GeneratedAsIdentityColumnConstraint 6215 | exp.ComputedColumnConstraint 6216 | exp.GeneratedAsRowColumnConstraint 6217 ): 6218 if self._match_text_seq("BY", "DEFAULT"): 6219 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6220 this = self.expression( 6221 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6222 ) 6223 else: 6224 self._match_text_seq("ALWAYS") 6225 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6226 6227 self._match(TokenType.ALIAS) 6228 6229 if self._match_text_seq("ROW"): 6230 start = self._match_text_seq("START") 6231 if not start: 6232 self._match(TokenType.END) 6233 hidden = self._match_text_seq("HIDDEN") 6234 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6235 6236 identity = self._match_text_seq("IDENTITY") 6237 6238 if self._match(TokenType.L_PAREN): 6239 if self._match(TokenType.START_WITH): 6240 this.set("start", self._parse_bitwise()) 6241 if self._match_text_seq("INCREMENT", "BY"): 6242 this.set("increment", self._parse_bitwise()) 6243 if self._match_text_seq("MINVALUE"): 6244 this.set("minvalue", self._parse_bitwise()) 6245 if self._match_text_seq("MAXVALUE"): 6246 this.set("maxvalue", self._parse_bitwise()) 6247 6248 if self._match_text_seq("CYCLE"): 6249 this.set("cycle", True) 6250 elif self._match_text_seq("NO", "CYCLE"): 6251 this.set("cycle", False) 6252 6253 if not identity: 6254 this.set("expression", self._parse_range()) 6255 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6256 args = self._parse_csv(self._parse_bitwise) 6257 this.set("start", seq_get(args, 0)) 6258 this.set("increment", seq_get(args, 1)) 6259 6260 self._match_r_paren() 6261 6262 return this 6263 6264 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6265 self._match_text_seq("LENGTH") 6266 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6267 6268 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6269 if self._match_text_seq("NULL"): 6270 return self.expression(exp.NotNullColumnConstraint) 6271 if self._match_text_seq("CASESPECIFIC"): 6272 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6273 if self._match_text_seq("FOR", "REPLICATION"): 6274 return self.expression(exp.NotForReplicationColumnConstraint) 6275 6276 # Unconsume the `NOT` token 6277 self._retreat(self._index - 1) 6278 return None 6279 6280 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6281 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6282 6283 procedure_option_follows = ( 6284 self._match(TokenType.WITH, advance=False) 6285 and self._next 6286 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6287 ) 6288 6289 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6290 return self.expression( 6291 exp.ColumnConstraint, 6292 this=this, 6293 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6294 ) 6295 6296 return this 6297 6298 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6299 if not self._match(TokenType.CONSTRAINT): 6300 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6301 6302 return self.expression( 6303 exp.Constraint, 6304 this=self._parse_id_var(), 6305 expressions=self._parse_unnamed_constraints(), 6306 ) 6307 6308 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6309 constraints = [] 6310 while True: 6311 constraint = self._parse_unnamed_constraint() or self._parse_function() 6312 if not constraint: 6313 break 6314 constraints.append(constraint) 6315 6316 return constraints 6317 6318 def _parse_unnamed_constraint( 6319 self, constraints: t.Optional[t.Collection[str]] = None 6320 ) -> t.Optional[exp.Expression]: 6321 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6322 constraints or self.CONSTRAINT_PARSERS 6323 ): 6324 return None 6325 6326 constraint = self._prev.text.upper() 6327 if constraint not in self.CONSTRAINT_PARSERS: 6328 self.raise_error(f"No parser found for schema constraint {constraint}.") 6329 6330 return self.CONSTRAINT_PARSERS[constraint](self) 6331 6332 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6333 return self._parse_id_var(any_token=False) 6334 6335 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6336 self._match_texts(("KEY", "INDEX")) 6337 return self.expression( 6338 exp.UniqueColumnConstraint, 6339 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6340 this=self._parse_schema(self._parse_unique_key()), 6341 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6342 on_conflict=self._parse_on_conflict(), 6343 options=self._parse_key_constraint_options(), 6344 ) 6345 6346 def _parse_key_constraint_options(self) -> t.List[str]: 6347 options = [] 6348 while True: 6349 if not self._curr: 6350 break 6351 6352 if self._match(TokenType.ON): 6353 action = None 6354 on = self._advance_any() and self._prev.text 6355 6356 if self._match_text_seq("NO", "ACTION"): 6357 action = "NO ACTION" 6358 elif self._match_text_seq("CASCADE"): 6359 action = "CASCADE" 6360 elif self._match_text_seq("RESTRICT"): 6361 action = "RESTRICT" 6362 elif self._match_pair(TokenType.SET, TokenType.NULL): 6363 action = "SET NULL" 6364 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6365 action = "SET DEFAULT" 6366 else: 6367 self.raise_error("Invalid key constraint") 6368 6369 options.append(f"ON {on} {action}") 6370 else: 6371 var = self._parse_var_from_options( 6372 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6373 ) 6374 if not var: 6375 break 6376 options.append(var.name) 6377 6378 return options 6379 6380 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6381 if match and not self._match(TokenType.REFERENCES): 6382 return None 6383 6384 expressions = None 6385 this = self._parse_table(schema=True) 6386 options = self._parse_key_constraint_options() 6387 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6388 6389 def _parse_foreign_key(self) -> exp.ForeignKey: 6390 expressions = ( 6391 self._parse_wrapped_id_vars() 6392 if not self._match(TokenType.REFERENCES, advance=False) 6393 else None 6394 ) 6395 reference = self._parse_references() 6396 on_options = {} 6397 6398 while self._match(TokenType.ON): 6399 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6400 self.raise_error("Expected DELETE or UPDATE") 6401 6402 kind = self._prev.text.lower() 6403 6404 if self._match_text_seq("NO", "ACTION"): 6405 action = "NO ACTION" 6406 elif self._match(TokenType.SET): 6407 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6408 action = "SET " + self._prev.text.upper() 6409 else: 6410 self._advance() 6411 action = self._prev.text.upper() 6412 6413 on_options[kind] = action 6414 6415 return self.expression( 6416 exp.ForeignKey, 6417 expressions=expressions, 6418 reference=reference, 6419 options=self._parse_key_constraint_options(), 6420 **on_options, # type: ignore 6421 ) 6422 6423 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6424 return self._parse_ordered() or self._parse_field() 6425 6426 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6427 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6428 self._retreat(self._index - 1) 6429 return None 6430 6431 id_vars = self._parse_wrapped_id_vars() 6432 return self.expression( 6433 exp.PeriodForSystemTimeConstraint, 6434 this=seq_get(id_vars, 0), 6435 expression=seq_get(id_vars, 1), 6436 ) 6437 6438 def _parse_primary_key( 6439 self, wrapped_optional: bool = False, in_props: bool = False 6440 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6441 desc = ( 6442 self._match_set((TokenType.ASC, TokenType.DESC)) 6443 and self._prev.token_type == TokenType.DESC 6444 ) 6445 6446 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6447 return self.expression( 6448 exp.PrimaryKeyColumnConstraint, 6449 desc=desc, 6450 options=self._parse_key_constraint_options(), 6451 ) 6452 6453 expressions = self._parse_wrapped_csv( 6454 self._parse_primary_key_part, optional=wrapped_optional 6455 ) 6456 6457 return self.expression( 6458 exp.PrimaryKey, 6459 expressions=expressions, 6460 include=self._parse_index_params(), 6461 options=self._parse_key_constraint_options(), 6462 ) 6463 6464 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6465 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6466 6467 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6468 """ 6469 Parses a datetime column in ODBC format. We parse the column into the corresponding 6470 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6471 same as we did for `DATE('yyyy-mm-dd')`. 6472 6473 Reference: 6474 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6475 """ 6476 self._match(TokenType.VAR) 6477 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6478 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6479 if not self._match(TokenType.R_BRACE): 6480 self.raise_error("Expected }") 6481 return expression 6482 6483 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6484 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6485 return this 6486 6487 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6488 map_token = seq_get(self._tokens, self._index - 2) 6489 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6490 else: 6491 parse_map = False 6492 6493 bracket_kind = self._prev.token_type 6494 if ( 6495 bracket_kind == TokenType.L_BRACE 6496 and self._curr 6497 and self._curr.token_type == TokenType.VAR 6498 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6499 ): 6500 return self._parse_odbc_datetime_literal() 6501 6502 expressions = self._parse_csv( 6503 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6504 ) 6505 6506 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6507 self.raise_error("Expected ]") 6508 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6509 self.raise_error("Expected }") 6510 6511 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6512 if bracket_kind == TokenType.L_BRACE: 6513 this = self.expression( 6514 exp.Struct, 6515 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6516 ) 6517 elif not this: 6518 this = build_array_constructor( 6519 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6520 ) 6521 else: 6522 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6523 if constructor_type: 6524 return build_array_constructor( 6525 constructor_type, 6526 args=expressions, 6527 bracket_kind=bracket_kind, 6528 dialect=self.dialect, 6529 ) 6530 6531 expressions = apply_index_offset( 6532 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6533 ) 6534 this = self.expression( 6535 exp.Bracket, 6536 this=this, 6537 expressions=expressions, 6538 comments=this.pop_comments(), 6539 ) 6540 6541 self._add_comments(this) 6542 return self._parse_bracket(this) 6543 6544 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6545 if self._match(TokenType.COLON): 6546 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6547 return this 6548 6549 def _parse_case(self) -> t.Optional[exp.Expression]: 6550 if self._match(TokenType.DOT, advance=False): 6551 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6552 self._retreat(self._index - 1) 6553 return None 6554 6555 ifs = [] 6556 default = None 6557 6558 comments = self._prev_comments 6559 expression = self._parse_assignment() 6560 6561 while self._match(TokenType.WHEN): 6562 this = self._parse_assignment() 6563 self._match(TokenType.THEN) 6564 then = self._parse_assignment() 6565 ifs.append(self.expression(exp.If, this=this, true=then)) 6566 6567 if self._match(TokenType.ELSE): 6568 default = self._parse_assignment() 6569 6570 if not self._match(TokenType.END): 6571 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6572 default = exp.column("interval") 6573 else: 6574 self.raise_error("Expected END after CASE", self._prev) 6575 6576 return self.expression( 6577 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6578 ) 6579 6580 def _parse_if(self) -> t.Optional[exp.Expression]: 6581 if self._match(TokenType.L_PAREN): 6582 args = self._parse_csv( 6583 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6584 ) 6585 this = self.validate_expression(exp.If.from_arg_list(args), args) 6586 self._match_r_paren() 6587 else: 6588 index = self._index - 1 6589 6590 if self.NO_PAREN_IF_COMMANDS and index == 0: 6591 return self._parse_as_command(self._prev) 6592 6593 condition = self._parse_assignment() 6594 6595 if not condition: 6596 self._retreat(index) 6597 return None 6598 6599 self._match(TokenType.THEN) 6600 true = self._parse_assignment() 6601 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6602 self._match(TokenType.END) 6603 this = self.expression(exp.If, this=condition, true=true, false=false) 6604 6605 return this 6606 6607 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6608 if not self._match_text_seq("VALUE", "FOR"): 6609 self._retreat(self._index - 1) 6610 return None 6611 6612 return self.expression( 6613 exp.NextValueFor, 6614 this=self._parse_column(), 6615 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6616 ) 6617 6618 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6619 this = self._parse_function() or self._parse_var_or_string(upper=True) 6620 6621 if self._match(TokenType.FROM): 6622 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6623 6624 if not self._match(TokenType.COMMA): 6625 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6626 6627 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6628 6629 def _parse_gap_fill(self) -> exp.GapFill: 6630 self._match(TokenType.TABLE) 6631 this = self._parse_table() 6632 6633 self._match(TokenType.COMMA) 6634 args = [this, *self._parse_csv(self._parse_lambda)] 6635 6636 gap_fill = exp.GapFill.from_arg_list(args) 6637 return self.validate_expression(gap_fill, args) 6638 6639 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6640 this = self._parse_assignment() 6641 6642 if not self._match(TokenType.ALIAS): 6643 if self._match(TokenType.COMMA): 6644 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6645 6646 self.raise_error("Expected AS after CAST") 6647 6648 fmt = None 6649 to = self._parse_types() 6650 6651 default = self._match(TokenType.DEFAULT) 6652 if default: 6653 default = self._parse_bitwise() 6654 self._match_text_seq("ON", "CONVERSION", "ERROR") 6655 6656 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6657 fmt_string = self._parse_string() 6658 fmt = self._parse_at_time_zone(fmt_string) 6659 6660 if not to: 6661 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6662 if to.this in exp.DataType.TEMPORAL_TYPES: 6663 this = self.expression( 6664 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6665 this=this, 6666 format=exp.Literal.string( 6667 format_time( 6668 fmt_string.this if fmt_string else "", 6669 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6670 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6671 ) 6672 ), 6673 safe=safe, 6674 ) 6675 6676 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6677 this.set("zone", fmt.args["zone"]) 6678 return this 6679 elif not to: 6680 self.raise_error("Expected TYPE after CAST") 6681 elif isinstance(to, exp.Identifier): 6682 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6683 elif to.this == exp.DataType.Type.CHAR: 6684 if self._match(TokenType.CHARACTER_SET): 6685 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6686 6687 return self.build_cast( 6688 strict=strict, 6689 this=this, 6690 to=to, 6691 format=fmt, 6692 safe=safe, 6693 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6694 default=default, 6695 ) 6696 6697 def _parse_string_agg(self) -> exp.GroupConcat: 6698 if self._match(TokenType.DISTINCT): 6699 args: t.List[t.Optional[exp.Expression]] = [ 6700 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6701 ] 6702 if self._match(TokenType.COMMA): 6703 args.extend(self._parse_csv(self._parse_assignment)) 6704 else: 6705 args = self._parse_csv(self._parse_assignment) # type: ignore 6706 6707 if self._match_text_seq("ON", "OVERFLOW"): 6708 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6709 if self._match_text_seq("ERROR"): 6710 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6711 else: 6712 self._match_text_seq("TRUNCATE") 6713 on_overflow = self.expression( 6714 exp.OverflowTruncateBehavior, 6715 this=self._parse_string(), 6716 with_count=( 6717 self._match_text_seq("WITH", "COUNT") 6718 or not self._match_text_seq("WITHOUT", "COUNT") 6719 ), 6720 ) 6721 else: 6722 on_overflow = None 6723 6724 index = self._index 6725 if not self._match(TokenType.R_PAREN) and args: 6726 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6727 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6728 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6729 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6730 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6731 6732 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6733 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6734 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6735 if not self._match_text_seq("WITHIN", "GROUP"): 6736 self._retreat(index) 6737 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6738 6739 # The corresponding match_r_paren will be called in parse_function (caller) 6740 self._match_l_paren() 6741 6742 return self.expression( 6743 exp.GroupConcat, 6744 this=self._parse_order(this=seq_get(args, 0)), 6745 separator=seq_get(args, 1), 6746 on_overflow=on_overflow, 6747 ) 6748 6749 def _parse_convert( 6750 self, strict: bool, safe: t.Optional[bool] = None 6751 ) -> t.Optional[exp.Expression]: 6752 this = self._parse_bitwise() 6753 6754 if self._match(TokenType.USING): 6755 to: t.Optional[exp.Expression] = self.expression( 6756 exp.CharacterSet, this=self._parse_var() 6757 ) 6758 elif self._match(TokenType.COMMA): 6759 to = self._parse_types() 6760 else: 6761 to = None 6762 6763 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6764 6765 def _parse_xml_table(self) -> exp.XMLTable: 6766 namespaces = None 6767 passing = None 6768 columns = None 6769 6770 if self._match_text_seq("XMLNAMESPACES", "("): 6771 namespaces = self._parse_xml_namespace() 6772 self._match_text_seq(")", ",") 6773 6774 this = self._parse_string() 6775 6776 if self._match_text_seq("PASSING"): 6777 # The BY VALUE keywords are optional and are provided for semantic clarity 6778 self._match_text_seq("BY", "VALUE") 6779 passing = self._parse_csv(self._parse_column) 6780 6781 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6782 6783 if self._match_text_seq("COLUMNS"): 6784 columns = self._parse_csv(self._parse_field_def) 6785 6786 return self.expression( 6787 exp.XMLTable, 6788 this=this, 6789 namespaces=namespaces, 6790 passing=passing, 6791 columns=columns, 6792 by_ref=by_ref, 6793 ) 6794 6795 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6796 namespaces = [] 6797 6798 while True: 6799 if self._match(TokenType.DEFAULT): 6800 uri = self._parse_string() 6801 else: 6802 uri = self._parse_alias(self._parse_string()) 6803 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6804 if not self._match(TokenType.COMMA): 6805 break 6806 6807 return namespaces 6808 6809 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6810 args = self._parse_csv(self._parse_assignment) 6811 6812 if len(args) < 3: 6813 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6814 6815 return self.expression(exp.DecodeCase, expressions=args) 6816 6817 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6818 self._match_text_seq("KEY") 6819 key = self._parse_column() 6820 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6821 self._match_text_seq("VALUE") 6822 value = self._parse_bitwise() 6823 6824 if not key and not value: 6825 return None 6826 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6827 6828 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6829 if not this or not self._match_text_seq("FORMAT", "JSON"): 6830 return this 6831 6832 return self.expression(exp.FormatJson, this=this) 6833 6834 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6835 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6836 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6837 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6838 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6839 else: 6840 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6841 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6842 6843 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6844 6845 if not empty and not error and not null: 6846 return None 6847 6848 return self.expression( 6849 exp.OnCondition, 6850 empty=empty, 6851 error=error, 6852 null=null, 6853 ) 6854 6855 def _parse_on_handling( 6856 self, on: str, *values: str 6857 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6858 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6859 for value in values: 6860 if self._match_text_seq(value, "ON", on): 6861 return f"{value} ON {on}" 6862 6863 index = self._index 6864 if self._match(TokenType.DEFAULT): 6865 default_value = self._parse_bitwise() 6866 if self._match_text_seq("ON", on): 6867 return default_value 6868 6869 self._retreat(index) 6870 6871 return None 6872 6873 @t.overload 6874 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6875 6876 @t.overload 6877 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6878 6879 def _parse_json_object(self, agg=False): 6880 star = self._parse_star() 6881 expressions = ( 6882 [star] 6883 if star 6884 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6885 ) 6886 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6887 6888 unique_keys = None 6889 if self._match_text_seq("WITH", "UNIQUE"): 6890 unique_keys = True 6891 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6892 unique_keys = False 6893 6894 self._match_text_seq("KEYS") 6895 6896 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6897 self._parse_type() 6898 ) 6899 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6900 6901 return self.expression( 6902 exp.JSONObjectAgg if agg else exp.JSONObject, 6903 expressions=expressions, 6904 null_handling=null_handling, 6905 unique_keys=unique_keys, 6906 return_type=return_type, 6907 encoding=encoding, 6908 ) 6909 6910 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6911 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6912 if not self._match_text_seq("NESTED"): 6913 this = self._parse_id_var() 6914 kind = self._parse_types(allow_identifiers=False) 6915 nested = None 6916 else: 6917 this = None 6918 kind = None 6919 nested = True 6920 6921 path = self._match_text_seq("PATH") and self._parse_string() 6922 nested_schema = nested and self._parse_json_schema() 6923 6924 return self.expression( 6925 exp.JSONColumnDef, 6926 this=this, 6927 kind=kind, 6928 path=path, 6929 nested_schema=nested_schema, 6930 ) 6931 6932 def _parse_json_schema(self) -> exp.JSONSchema: 6933 self._match_text_seq("COLUMNS") 6934 return self.expression( 6935 exp.JSONSchema, 6936 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6937 ) 6938 6939 def _parse_json_table(self) -> exp.JSONTable: 6940 this = self._parse_format_json(self._parse_bitwise()) 6941 path = self._match(TokenType.COMMA) and self._parse_string() 6942 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6943 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6944 schema = self._parse_json_schema() 6945 6946 return exp.JSONTable( 6947 this=this, 6948 schema=schema, 6949 path=path, 6950 error_handling=error_handling, 6951 empty_handling=empty_handling, 6952 ) 6953 6954 def _parse_match_against(self) -> exp.MatchAgainst: 6955 if self._match_text_seq("TABLE"): 6956 # parse SingleStore MATCH(TABLE ...) syntax 6957 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 6958 expressions = [] 6959 table = self._parse_table() 6960 if table: 6961 expressions = [table] 6962 else: 6963 expressions = self._parse_csv(self._parse_column) 6964 6965 self._match_text_seq(")", "AGAINST", "(") 6966 6967 this = self._parse_string() 6968 6969 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6970 modifier = "IN NATURAL LANGUAGE MODE" 6971 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6972 modifier = f"{modifier} WITH QUERY EXPANSION" 6973 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6974 modifier = "IN BOOLEAN MODE" 6975 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6976 modifier = "WITH QUERY EXPANSION" 6977 else: 6978 modifier = None 6979 6980 return self.expression( 6981 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6982 ) 6983 6984 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6985 def _parse_open_json(self) -> exp.OpenJSON: 6986 this = self._parse_bitwise() 6987 path = self._match(TokenType.COMMA) and self._parse_string() 6988 6989 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6990 this = self._parse_field(any_token=True) 6991 kind = self._parse_types() 6992 path = self._parse_string() 6993 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6994 6995 return self.expression( 6996 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6997 ) 6998 6999 expressions = None 7000 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 7001 self._match_l_paren() 7002 expressions = self._parse_csv(_parse_open_json_column_def) 7003 7004 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 7005 7006 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 7007 args = self._parse_csv(self._parse_bitwise) 7008 7009 if self._match(TokenType.IN): 7010 return self.expression( 7011 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 7012 ) 7013 7014 if haystack_first: 7015 haystack = seq_get(args, 0) 7016 needle = seq_get(args, 1) 7017 else: 7018 haystack = seq_get(args, 1) 7019 needle = seq_get(args, 0) 7020 7021 return self.expression( 7022 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 7023 ) 7024 7025 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 7026 args = self._parse_csv(self._parse_table) 7027 return exp.JoinHint(this=func_name.upper(), expressions=args) 7028 7029 def _parse_substring(self) -> exp.Substring: 7030 # Postgres supports the form: substring(string [from int] [for int]) 7031 # (despite being undocumented, the reverse order also works) 7032 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 7033 7034 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 7035 7036 start, length = None, None 7037 7038 while self._curr: 7039 if self._match(TokenType.FROM): 7040 start = self._parse_bitwise() 7041 elif self._match(TokenType.FOR): 7042 if not start: 7043 start = exp.Literal.number(1) 7044 length = self._parse_bitwise() 7045 else: 7046 break 7047 7048 if start: 7049 args.append(start) 7050 if length: 7051 args.append(length) 7052 7053 return self.validate_expression(exp.Substring.from_arg_list(args), args) 7054 7055 def _parse_trim(self) -> exp.Trim: 7056 # https://www.w3resource.com/sql/character-functions/trim.php 7057 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 7058 7059 position = None 7060 collation = None 7061 expression = None 7062 7063 if self._match_texts(self.TRIM_TYPES): 7064 position = self._prev.text.upper() 7065 7066 this = self._parse_bitwise() 7067 if self._match_set((TokenType.FROM, TokenType.COMMA)): 7068 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 7069 expression = self._parse_bitwise() 7070 7071 if invert_order: 7072 this, expression = expression, this 7073 7074 if self._match(TokenType.COLLATE): 7075 collation = self._parse_bitwise() 7076 7077 return self.expression( 7078 exp.Trim, this=this, position=position, expression=expression, collation=collation 7079 ) 7080 7081 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 7082 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 7083 7084 def _parse_named_window(self) -> t.Optional[exp.Expression]: 7085 return self._parse_window(self._parse_id_var(), alias=True) 7086 7087 def _parse_respect_or_ignore_nulls( 7088 self, this: t.Optional[exp.Expression] 7089 ) -> t.Optional[exp.Expression]: 7090 if self._match_text_seq("IGNORE", "NULLS"): 7091 return self.expression(exp.IgnoreNulls, this=this) 7092 if self._match_text_seq("RESPECT", "NULLS"): 7093 return self.expression(exp.RespectNulls, this=this) 7094 return this 7095 7096 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7097 if self._match(TokenType.HAVING): 7098 self._match_texts(("MAX", "MIN")) 7099 max = self._prev.text.upper() != "MIN" 7100 return self.expression( 7101 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7102 ) 7103 7104 return this 7105 7106 def _parse_window( 7107 self, this: t.Optional[exp.Expression], alias: bool = False 7108 ) -> t.Optional[exp.Expression]: 7109 func = this 7110 comments = func.comments if isinstance(func, exp.Expression) else None 7111 7112 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7113 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7114 if self._match_text_seq("WITHIN", "GROUP"): 7115 order = self._parse_wrapped(self._parse_order) 7116 this = self.expression(exp.WithinGroup, this=this, expression=order) 7117 7118 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7119 self._match(TokenType.WHERE) 7120 this = self.expression( 7121 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7122 ) 7123 self._match_r_paren() 7124 7125 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7126 # Some dialects choose to implement and some do not. 7127 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7128 7129 # There is some code above in _parse_lambda that handles 7130 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7131 7132 # The below changes handle 7133 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7134 7135 # Oracle allows both formats 7136 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7137 # and Snowflake chose to do the same for familiarity 7138 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7139 if isinstance(this, exp.AggFunc): 7140 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7141 7142 if ignore_respect and ignore_respect is not this: 7143 ignore_respect.replace(ignore_respect.this) 7144 this = self.expression(ignore_respect.__class__, this=this) 7145 7146 this = self._parse_respect_or_ignore_nulls(this) 7147 7148 # bigquery select from window x AS (partition by ...) 7149 if alias: 7150 over = None 7151 self._match(TokenType.ALIAS) 7152 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7153 return this 7154 else: 7155 over = self._prev.text.upper() 7156 7157 if comments and isinstance(func, exp.Expression): 7158 func.pop_comments() 7159 7160 if not self._match(TokenType.L_PAREN): 7161 return self.expression( 7162 exp.Window, 7163 comments=comments, 7164 this=this, 7165 alias=self._parse_id_var(False), 7166 over=over, 7167 ) 7168 7169 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7170 7171 first = self._match(TokenType.FIRST) 7172 if self._match_text_seq("LAST"): 7173 first = False 7174 7175 partition, order = self._parse_partition_and_order() 7176 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7177 7178 if kind: 7179 self._match(TokenType.BETWEEN) 7180 start = self._parse_window_spec() 7181 7182 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7183 exclude = ( 7184 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7185 if self._match_text_seq("EXCLUDE") 7186 else None 7187 ) 7188 7189 spec = self.expression( 7190 exp.WindowSpec, 7191 kind=kind, 7192 start=start["value"], 7193 start_side=start["side"], 7194 end=end.get("value"), 7195 end_side=end.get("side"), 7196 exclude=exclude, 7197 ) 7198 else: 7199 spec = None 7200 7201 self._match_r_paren() 7202 7203 window = self.expression( 7204 exp.Window, 7205 comments=comments, 7206 this=this, 7207 partition_by=partition, 7208 order=order, 7209 spec=spec, 7210 alias=window_alias, 7211 over=over, 7212 first=first, 7213 ) 7214 7215 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7216 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7217 return self._parse_window(window, alias=alias) 7218 7219 return window 7220 7221 def _parse_partition_and_order( 7222 self, 7223 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7224 return self._parse_partition_by(), self._parse_order() 7225 7226 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7227 self._match(TokenType.BETWEEN) 7228 7229 return { 7230 "value": ( 7231 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7232 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7233 or self._parse_type() 7234 ), 7235 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7236 } 7237 7238 def _parse_alias( 7239 self, this: t.Optional[exp.Expression], explicit: bool = False 7240 ) -> t.Optional[exp.Expression]: 7241 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7242 # so this section tries to parse the clause version and if it fails, it treats the token 7243 # as an identifier (alias) 7244 if self._can_parse_limit_or_offset(): 7245 return this 7246 7247 any_token = self._match(TokenType.ALIAS) 7248 comments = self._prev_comments or [] 7249 7250 if explicit and not any_token: 7251 return this 7252 7253 if self._match(TokenType.L_PAREN): 7254 aliases = self.expression( 7255 exp.Aliases, 7256 comments=comments, 7257 this=this, 7258 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7259 ) 7260 self._match_r_paren(aliases) 7261 return aliases 7262 7263 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7264 self.STRING_ALIASES and self._parse_string_as_identifier() 7265 ) 7266 7267 if alias: 7268 comments.extend(alias.pop_comments()) 7269 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7270 column = this.this 7271 7272 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7273 if not this.comments and column and column.comments: 7274 this.comments = column.pop_comments() 7275 7276 return this 7277 7278 def _parse_id_var( 7279 self, 7280 any_token: bool = True, 7281 tokens: t.Optional[t.Collection[TokenType]] = None, 7282 ) -> t.Optional[exp.Expression]: 7283 expression = self._parse_identifier() 7284 if not expression and ( 7285 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7286 ): 7287 quoted = self._prev.token_type == TokenType.STRING 7288 expression = self._identifier_expression(quoted=quoted) 7289 7290 return expression 7291 7292 def _parse_string(self) -> t.Optional[exp.Expression]: 7293 if self._match_set(self.STRING_PARSERS): 7294 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7295 return self._parse_placeholder() 7296 7297 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7298 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7299 if output: 7300 output.update_positions(self._prev) 7301 return output 7302 7303 def _parse_number(self) -> t.Optional[exp.Expression]: 7304 if self._match_set(self.NUMERIC_PARSERS): 7305 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7306 return self._parse_placeholder() 7307 7308 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7309 if self._match(TokenType.IDENTIFIER): 7310 return self._identifier_expression(quoted=True) 7311 return self._parse_placeholder() 7312 7313 def _parse_var( 7314 self, 7315 any_token: bool = False, 7316 tokens: t.Optional[t.Collection[TokenType]] = None, 7317 upper: bool = False, 7318 ) -> t.Optional[exp.Expression]: 7319 if ( 7320 (any_token and self._advance_any()) 7321 or self._match(TokenType.VAR) 7322 or (self._match_set(tokens) if tokens else False) 7323 ): 7324 return self.expression( 7325 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7326 ) 7327 return self._parse_placeholder() 7328 7329 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7330 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7331 self._advance() 7332 return self._prev 7333 return None 7334 7335 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7336 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7337 7338 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7339 return self._parse_primary() or self._parse_var(any_token=True) 7340 7341 def _parse_null(self) -> t.Optional[exp.Expression]: 7342 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 7343 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7344 return self._parse_placeholder() 7345 7346 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7347 if self._match(TokenType.TRUE): 7348 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7349 if self._match(TokenType.FALSE): 7350 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7351 return self._parse_placeholder() 7352 7353 def _parse_star(self) -> t.Optional[exp.Expression]: 7354 if self._match(TokenType.STAR): 7355 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7356 return self._parse_placeholder() 7357 7358 def _parse_parameter(self) -> exp.Parameter: 7359 this = self._parse_identifier() or self._parse_primary_or_var() 7360 return self.expression(exp.Parameter, this=this) 7361 7362 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7363 if self._match_set(self.PLACEHOLDER_PARSERS): 7364 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7365 if placeholder: 7366 return placeholder 7367 self._advance(-1) 7368 return None 7369 7370 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7371 if not self._match_texts(keywords): 7372 return None 7373 if self._match(TokenType.L_PAREN, advance=False): 7374 return self._parse_wrapped_csv(self._parse_expression) 7375 7376 expression = self._parse_alias(self._parse_assignment(), explicit=True) 7377 return [expression] if expression else None 7378 7379 def _parse_csv( 7380 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7381 ) -> t.List[exp.Expression]: 7382 parse_result = parse_method() 7383 items = [parse_result] if parse_result is not None else [] 7384 7385 while self._match(sep): 7386 self._add_comments(parse_result) 7387 parse_result = parse_method() 7388 if parse_result is not None: 7389 items.append(parse_result) 7390 7391 return items 7392 7393 def _parse_tokens( 7394 self, parse_method: t.Callable, expressions: t.Dict 7395 ) -> t.Optional[exp.Expression]: 7396 this = parse_method() 7397 7398 while self._match_set(expressions): 7399 this = self.expression( 7400 expressions[self._prev.token_type], 7401 this=this, 7402 comments=self._prev_comments, 7403 expression=parse_method(), 7404 ) 7405 7406 return this 7407 7408 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7409 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7410 7411 def _parse_wrapped_csv( 7412 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7413 ) -> t.List[exp.Expression]: 7414 return self._parse_wrapped( 7415 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7416 ) 7417 7418 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7419 wrapped = self._match(TokenType.L_PAREN) 7420 if not wrapped and not optional: 7421 self.raise_error("Expecting (") 7422 parse_result = parse_method() 7423 if wrapped: 7424 self._match_r_paren() 7425 return parse_result 7426 7427 def _parse_expressions(self) -> t.List[exp.Expression]: 7428 return self._parse_csv(self._parse_expression) 7429 7430 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7431 return ( 7432 self._parse_set_operations( 7433 self._parse_alias(self._parse_assignment(), explicit=True) 7434 if alias 7435 else self._parse_assignment() 7436 ) 7437 or self._parse_select() 7438 ) 7439 7440 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7441 return self._parse_query_modifiers( 7442 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7443 ) 7444 7445 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7446 this = None 7447 if self._match_texts(self.TRANSACTION_KIND): 7448 this = self._prev.text 7449 7450 self._match_texts(("TRANSACTION", "WORK")) 7451 7452 modes = [] 7453 while True: 7454 mode = [] 7455 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7456 mode.append(self._prev.text) 7457 7458 if mode: 7459 modes.append(" ".join(mode)) 7460 if not self._match(TokenType.COMMA): 7461 break 7462 7463 return self.expression(exp.Transaction, this=this, modes=modes) 7464 7465 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7466 chain = None 7467 savepoint = None 7468 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7469 7470 self._match_texts(("TRANSACTION", "WORK")) 7471 7472 if self._match_text_seq("TO"): 7473 self._match_text_seq("SAVEPOINT") 7474 savepoint = self._parse_id_var() 7475 7476 if self._match(TokenType.AND): 7477 chain = not self._match_text_seq("NO") 7478 self._match_text_seq("CHAIN") 7479 7480 if is_rollback: 7481 return self.expression(exp.Rollback, savepoint=savepoint) 7482 7483 return self.expression(exp.Commit, chain=chain) 7484 7485 def _parse_refresh(self) -> exp.Refresh: 7486 self._match(TokenType.TABLE) 7487 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7488 7489 def _parse_column_def_with_exists(self): 7490 start = self._index 7491 self._match(TokenType.COLUMN) 7492 7493 exists_column = self._parse_exists(not_=True) 7494 expression = self._parse_field_def() 7495 7496 if not isinstance(expression, exp.ColumnDef): 7497 self._retreat(start) 7498 return None 7499 7500 expression.set("exists", exists_column) 7501 7502 return expression 7503 7504 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7505 if not self._prev.text.upper() == "ADD": 7506 return None 7507 7508 expression = self._parse_column_def_with_exists() 7509 if not expression: 7510 return None 7511 7512 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7513 if self._match_texts(("FIRST", "AFTER")): 7514 position = self._prev.text 7515 column_position = self.expression( 7516 exp.ColumnPosition, this=self._parse_column(), position=position 7517 ) 7518 expression.set("position", column_position) 7519 7520 return expression 7521 7522 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7523 drop = self._match(TokenType.DROP) and self._parse_drop() 7524 if drop and not isinstance(drop, exp.Command): 7525 drop.set("kind", drop.args.get("kind", "COLUMN")) 7526 return drop 7527 7528 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7529 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7530 return self.expression( 7531 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7532 ) 7533 7534 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7535 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7536 self._match_text_seq("ADD") 7537 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7538 return self.expression( 7539 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7540 ) 7541 7542 column_def = self._parse_add_column() 7543 if isinstance(column_def, exp.ColumnDef): 7544 return column_def 7545 7546 exists = self._parse_exists(not_=True) 7547 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7548 return self.expression( 7549 exp.AddPartition, 7550 exists=exists, 7551 this=self._parse_field(any_token=True), 7552 location=self._match_text_seq("LOCATION", advance=False) 7553 and self._parse_property(), 7554 ) 7555 7556 return None 7557 7558 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7559 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7560 or self._match_text_seq("COLUMNS") 7561 ): 7562 schema = self._parse_schema() 7563 7564 return ( 7565 ensure_list(schema) 7566 if schema 7567 else self._parse_csv(self._parse_column_def_with_exists) 7568 ) 7569 7570 return self._parse_csv(_parse_add_alteration) 7571 7572 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7573 if self._match_texts(self.ALTER_ALTER_PARSERS): 7574 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7575 7576 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7577 # keyword after ALTER we default to parsing this statement 7578 self._match(TokenType.COLUMN) 7579 column = self._parse_field(any_token=True) 7580 7581 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7582 return self.expression(exp.AlterColumn, this=column, drop=True) 7583 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7584 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7585 if self._match(TokenType.COMMENT): 7586 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7587 if self._match_text_seq("DROP", "NOT", "NULL"): 7588 return self.expression( 7589 exp.AlterColumn, 7590 this=column, 7591 drop=True, 7592 allow_null=True, 7593 ) 7594 if self._match_text_seq("SET", "NOT", "NULL"): 7595 return self.expression( 7596 exp.AlterColumn, 7597 this=column, 7598 allow_null=False, 7599 ) 7600 7601 if self._match_text_seq("SET", "VISIBLE"): 7602 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7603 if self._match_text_seq("SET", "INVISIBLE"): 7604 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7605 7606 self._match_text_seq("SET", "DATA") 7607 self._match_text_seq("TYPE") 7608 return self.expression( 7609 exp.AlterColumn, 7610 this=column, 7611 dtype=self._parse_types(), 7612 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7613 using=self._match(TokenType.USING) and self._parse_assignment(), 7614 ) 7615 7616 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7617 if self._match_texts(("ALL", "EVEN", "AUTO")): 7618 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7619 7620 self._match_text_seq("KEY", "DISTKEY") 7621 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7622 7623 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7624 if compound: 7625 self._match_text_seq("SORTKEY") 7626 7627 if self._match(TokenType.L_PAREN, advance=False): 7628 return self.expression( 7629 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7630 ) 7631 7632 self._match_texts(("AUTO", "NONE")) 7633 return self.expression( 7634 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7635 ) 7636 7637 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7638 index = self._index - 1 7639 7640 partition_exists = self._parse_exists() 7641 if self._match(TokenType.PARTITION, advance=False): 7642 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7643 7644 self._retreat(index) 7645 return self._parse_csv(self._parse_drop_column) 7646 7647 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7648 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7649 exists = self._parse_exists() 7650 old_column = self._parse_column() 7651 to = self._match_text_seq("TO") 7652 new_column = self._parse_column() 7653 7654 if old_column is None or to is None or new_column is None: 7655 return None 7656 7657 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7658 7659 self._match_text_seq("TO") 7660 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7661 7662 def _parse_alter_table_set(self) -> exp.AlterSet: 7663 alter_set = self.expression(exp.AlterSet) 7664 7665 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7666 "TABLE", "PROPERTIES" 7667 ): 7668 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7669 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7670 alter_set.set("expressions", [self._parse_assignment()]) 7671 elif self._match_texts(("LOGGED", "UNLOGGED")): 7672 alter_set.set("option", exp.var(self._prev.text.upper())) 7673 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7674 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7675 elif self._match_text_seq("LOCATION"): 7676 alter_set.set("location", self._parse_field()) 7677 elif self._match_text_seq("ACCESS", "METHOD"): 7678 alter_set.set("access_method", self._parse_field()) 7679 elif self._match_text_seq("TABLESPACE"): 7680 alter_set.set("tablespace", self._parse_field()) 7681 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7682 alter_set.set("file_format", [self._parse_field()]) 7683 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7684 alter_set.set("file_format", self._parse_wrapped_options()) 7685 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7686 alter_set.set("copy_options", self._parse_wrapped_options()) 7687 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7688 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7689 else: 7690 if self._match_text_seq("SERDE"): 7691 alter_set.set("serde", self._parse_field()) 7692 7693 properties = self._parse_wrapped(self._parse_properties, optional=True) 7694 alter_set.set("expressions", [properties]) 7695 7696 return alter_set 7697 7698 def _parse_alter_session(self) -> exp.AlterSession: 7699 """Parse ALTER SESSION SET/UNSET statements.""" 7700 if self._match(TokenType.SET): 7701 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7702 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7703 7704 self._match_text_seq("UNSET") 7705 expressions = self._parse_csv( 7706 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7707 ) 7708 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7709 7710 def _parse_alter(self) -> exp.Alter | exp.Command: 7711 start = self._prev 7712 7713 alter_token = self._match_set(self.ALTERABLES) and self._prev 7714 if not alter_token: 7715 return self._parse_as_command(start) 7716 7717 exists = self._parse_exists() 7718 only = self._match_text_seq("ONLY") 7719 7720 if alter_token.token_type == TokenType.SESSION: 7721 this = None 7722 check = None 7723 cluster = None 7724 else: 7725 this = self._parse_table(schema=True) 7726 check = self._match_text_seq("WITH", "CHECK") 7727 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7728 7729 if self._next: 7730 self._advance() 7731 7732 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7733 if parser: 7734 actions = ensure_list(parser(self)) 7735 not_valid = self._match_text_seq("NOT", "VALID") 7736 options = self._parse_csv(self._parse_property) 7737 7738 if not self._curr and actions: 7739 return self.expression( 7740 exp.Alter, 7741 this=this, 7742 kind=alter_token.text.upper(), 7743 exists=exists, 7744 actions=actions, 7745 only=only, 7746 options=options, 7747 cluster=cluster, 7748 not_valid=not_valid, 7749 check=check, 7750 ) 7751 7752 return self._parse_as_command(start) 7753 7754 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7755 start = self._prev 7756 # https://duckdb.org/docs/sql/statements/analyze 7757 if not self._curr: 7758 return self.expression(exp.Analyze) 7759 7760 options = [] 7761 while self._match_texts(self.ANALYZE_STYLES): 7762 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7763 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7764 else: 7765 options.append(self._prev.text.upper()) 7766 7767 this: t.Optional[exp.Expression] = None 7768 inner_expression: t.Optional[exp.Expression] = None 7769 7770 kind = self._curr and self._curr.text.upper() 7771 7772 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7773 this = self._parse_table_parts() 7774 elif self._match_text_seq("TABLES"): 7775 if self._match_set((TokenType.FROM, TokenType.IN)): 7776 kind = f"{kind} {self._prev.text.upper()}" 7777 this = self._parse_table(schema=True, is_db_reference=True) 7778 elif self._match_text_seq("DATABASE"): 7779 this = self._parse_table(schema=True, is_db_reference=True) 7780 elif self._match_text_seq("CLUSTER"): 7781 this = self._parse_table() 7782 # Try matching inner expr keywords before fallback to parse table. 7783 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7784 kind = None 7785 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7786 else: 7787 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7788 kind = None 7789 this = self._parse_table_parts() 7790 7791 partition = self._try_parse(self._parse_partition) 7792 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7793 return self._parse_as_command(start) 7794 7795 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7796 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7797 "WITH", "ASYNC", "MODE" 7798 ): 7799 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7800 else: 7801 mode = None 7802 7803 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7804 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7805 7806 properties = self._parse_properties() 7807 return self.expression( 7808 exp.Analyze, 7809 kind=kind, 7810 this=this, 7811 mode=mode, 7812 partition=partition, 7813 properties=properties, 7814 expression=inner_expression, 7815 options=options, 7816 ) 7817 7818 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7819 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7820 this = None 7821 kind = self._prev.text.upper() 7822 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7823 expressions = [] 7824 7825 if not self._match_text_seq("STATISTICS"): 7826 self.raise_error("Expecting token STATISTICS") 7827 7828 if self._match_text_seq("NOSCAN"): 7829 this = "NOSCAN" 7830 elif self._match(TokenType.FOR): 7831 if self._match_text_seq("ALL", "COLUMNS"): 7832 this = "FOR ALL COLUMNS" 7833 if self._match_texts("COLUMNS"): 7834 this = "FOR COLUMNS" 7835 expressions = self._parse_csv(self._parse_column_reference) 7836 elif self._match_text_seq("SAMPLE"): 7837 sample = self._parse_number() 7838 expressions = [ 7839 self.expression( 7840 exp.AnalyzeSample, 7841 sample=sample, 7842 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7843 ) 7844 ] 7845 7846 return self.expression( 7847 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7848 ) 7849 7850 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7851 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7852 kind = None 7853 this = None 7854 expression: t.Optional[exp.Expression] = None 7855 if self._match_text_seq("REF", "UPDATE"): 7856 kind = "REF" 7857 this = "UPDATE" 7858 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7859 this = "UPDATE SET DANGLING TO NULL" 7860 elif self._match_text_seq("STRUCTURE"): 7861 kind = "STRUCTURE" 7862 if self._match_text_seq("CASCADE", "FAST"): 7863 this = "CASCADE FAST" 7864 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7865 ("ONLINE", "OFFLINE") 7866 ): 7867 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7868 expression = self._parse_into() 7869 7870 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7871 7872 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7873 this = self._prev.text.upper() 7874 if self._match_text_seq("COLUMNS"): 7875 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7876 return None 7877 7878 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7879 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7880 if self._match_text_seq("STATISTICS"): 7881 return self.expression(exp.AnalyzeDelete, kind=kind) 7882 return None 7883 7884 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7885 if self._match_text_seq("CHAINED", "ROWS"): 7886 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7887 return None 7888 7889 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7890 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7891 this = self._prev.text.upper() 7892 expression: t.Optional[exp.Expression] = None 7893 expressions = [] 7894 update_options = None 7895 7896 if self._match_text_seq("HISTOGRAM", "ON"): 7897 expressions = self._parse_csv(self._parse_column_reference) 7898 with_expressions = [] 7899 while self._match(TokenType.WITH): 7900 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7901 if self._match_texts(("SYNC", "ASYNC")): 7902 if self._match_text_seq("MODE", advance=False): 7903 with_expressions.append(f"{self._prev.text.upper()} MODE") 7904 self._advance() 7905 else: 7906 buckets = self._parse_number() 7907 if self._match_text_seq("BUCKETS"): 7908 with_expressions.append(f"{buckets} BUCKETS") 7909 if with_expressions: 7910 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7911 7912 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7913 TokenType.UPDATE, advance=False 7914 ): 7915 update_options = self._prev.text.upper() 7916 self._advance() 7917 elif self._match_text_seq("USING", "DATA"): 7918 expression = self.expression(exp.UsingData, this=self._parse_string()) 7919 7920 return self.expression( 7921 exp.AnalyzeHistogram, 7922 this=this, 7923 expressions=expressions, 7924 expression=expression, 7925 update_options=update_options, 7926 ) 7927 7928 def _parse_merge(self) -> exp.Merge: 7929 self._match(TokenType.INTO) 7930 target = self._parse_table() 7931 7932 if target and self._match(TokenType.ALIAS, advance=False): 7933 target.set("alias", self._parse_table_alias()) 7934 7935 self._match(TokenType.USING) 7936 using = self._parse_table() 7937 7938 self._match(TokenType.ON) 7939 on = self._parse_assignment() 7940 7941 return self.expression( 7942 exp.Merge, 7943 this=target, 7944 using=using, 7945 on=on, 7946 whens=self._parse_when_matched(), 7947 returning=self._parse_returning(), 7948 ) 7949 7950 def _parse_when_matched(self) -> exp.Whens: 7951 whens = [] 7952 7953 while self._match(TokenType.WHEN): 7954 matched = not self._match(TokenType.NOT) 7955 self._match_text_seq("MATCHED") 7956 source = ( 7957 False 7958 if self._match_text_seq("BY", "TARGET") 7959 else self._match_text_seq("BY", "SOURCE") 7960 ) 7961 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7962 7963 self._match(TokenType.THEN) 7964 7965 if self._match(TokenType.INSERT): 7966 this = self._parse_star() 7967 if this: 7968 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7969 else: 7970 then = self.expression( 7971 exp.Insert, 7972 this=exp.var("ROW") 7973 if self._match_text_seq("ROW") 7974 else self._parse_value(values=False), 7975 expression=self._match_text_seq("VALUES") and self._parse_value(), 7976 ) 7977 elif self._match(TokenType.UPDATE): 7978 expressions = self._parse_star() 7979 if expressions: 7980 then = self.expression(exp.Update, expressions=expressions) 7981 else: 7982 then = self.expression( 7983 exp.Update, 7984 expressions=self._match(TokenType.SET) 7985 and self._parse_csv(self._parse_equality), 7986 ) 7987 elif self._match(TokenType.DELETE): 7988 then = self.expression(exp.Var, this=self._prev.text) 7989 else: 7990 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7991 7992 whens.append( 7993 self.expression( 7994 exp.When, 7995 matched=matched, 7996 source=source, 7997 condition=condition, 7998 then=then, 7999 ) 8000 ) 8001 return self.expression(exp.Whens, expressions=whens) 8002 8003 def _parse_show(self) -> t.Optional[exp.Expression]: 8004 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 8005 if parser: 8006 return parser(self) 8007 return self._parse_as_command(self._prev) 8008 8009 def _parse_set_item_assignment( 8010 self, kind: t.Optional[str] = None 8011 ) -> t.Optional[exp.Expression]: 8012 index = self._index 8013 8014 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 8015 return self._parse_set_transaction(global_=kind == "GLOBAL") 8016 8017 left = self._parse_primary() or self._parse_column() 8018 assignment_delimiter = self._match_texts(("=", "TO")) 8019 8020 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 8021 self._retreat(index) 8022 return None 8023 8024 right = self._parse_statement() or self._parse_id_var() 8025 if isinstance(right, (exp.Column, exp.Identifier)): 8026 right = exp.var(right.name) 8027 8028 this = self.expression(exp.EQ, this=left, expression=right) 8029 return self.expression(exp.SetItem, this=this, kind=kind) 8030 8031 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 8032 self._match_text_seq("TRANSACTION") 8033 characteristics = self._parse_csv( 8034 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 8035 ) 8036 return self.expression( 8037 exp.SetItem, 8038 expressions=characteristics, 8039 kind="TRANSACTION", 8040 **{"global": global_}, # type: ignore 8041 ) 8042 8043 def _parse_set_item(self) -> t.Optional[exp.Expression]: 8044 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 8045 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 8046 8047 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 8048 index = self._index 8049 set_ = self.expression( 8050 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 8051 ) 8052 8053 if self._curr: 8054 self._retreat(index) 8055 return self._parse_as_command(self._prev) 8056 8057 return set_ 8058 8059 def _parse_var_from_options( 8060 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 8061 ) -> t.Optional[exp.Var]: 8062 start = self._curr 8063 if not start: 8064 return None 8065 8066 option = start.text.upper() 8067 continuations = options.get(option) 8068 8069 index = self._index 8070 self._advance() 8071 for keywords in continuations or []: 8072 if isinstance(keywords, str): 8073 keywords = (keywords,) 8074 8075 if self._match_text_seq(*keywords): 8076 option = f"{option} {' '.join(keywords)}" 8077 break 8078 else: 8079 if continuations or continuations is None: 8080 if raise_unmatched: 8081 self.raise_error(f"Unknown option {option}") 8082 8083 self._retreat(index) 8084 return None 8085 8086 return exp.var(option) 8087 8088 def _parse_as_command(self, start: Token) -> exp.Command: 8089 while self._curr: 8090 self._advance() 8091 text = self._find_sql(start, self._prev) 8092 size = len(start.text) 8093 self._warn_unsupported() 8094 return exp.Command(this=text[:size], expression=text[size:]) 8095 8096 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8097 settings = [] 8098 8099 self._match_l_paren() 8100 kind = self._parse_id_var() 8101 8102 if self._match(TokenType.L_PAREN): 8103 while True: 8104 key = self._parse_id_var() 8105 value = self._parse_primary() 8106 if not key and value is None: 8107 break 8108 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8109 self._match(TokenType.R_PAREN) 8110 8111 self._match_r_paren() 8112 8113 return self.expression( 8114 exp.DictProperty, 8115 this=this, 8116 kind=kind.this if kind else None, 8117 settings=settings, 8118 ) 8119 8120 def _parse_dict_range(self, this: str) -> exp.DictRange: 8121 self._match_l_paren() 8122 has_min = self._match_text_seq("MIN") 8123 if has_min: 8124 min = self._parse_var() or self._parse_primary() 8125 self._match_text_seq("MAX") 8126 max = self._parse_var() or self._parse_primary() 8127 else: 8128 max = self._parse_var() or self._parse_primary() 8129 min = exp.Literal.number(0) 8130 self._match_r_paren() 8131 return self.expression(exp.DictRange, this=this, min=min, max=max) 8132 8133 def _parse_comprehension( 8134 self, this: t.Optional[exp.Expression] 8135 ) -> t.Optional[exp.Comprehension]: 8136 index = self._index 8137 expression = self._parse_column() 8138 if not self._match(TokenType.IN): 8139 self._retreat(index - 1) 8140 return None 8141 iterator = self._parse_column() 8142 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8143 return self.expression( 8144 exp.Comprehension, 8145 this=this, 8146 expression=expression, 8147 iterator=iterator, 8148 condition=condition, 8149 ) 8150 8151 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8152 if self._match(TokenType.HEREDOC_STRING): 8153 return self.expression(exp.Heredoc, this=self._prev.text) 8154 8155 if not self._match_text_seq("$"): 8156 return None 8157 8158 tags = ["$"] 8159 tag_text = None 8160 8161 if self._is_connected(): 8162 self._advance() 8163 tags.append(self._prev.text.upper()) 8164 else: 8165 self.raise_error("No closing $ found") 8166 8167 if tags[-1] != "$": 8168 if self._is_connected() and self._match_text_seq("$"): 8169 tag_text = tags[-1] 8170 tags.append("$") 8171 else: 8172 self.raise_error("No closing $ found") 8173 8174 heredoc_start = self._curr 8175 8176 while self._curr: 8177 if self._match_text_seq(*tags, advance=False): 8178 this = self._find_sql(heredoc_start, self._prev) 8179 self._advance(len(tags)) 8180 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8181 8182 self._advance() 8183 8184 self.raise_error(f"No closing {''.join(tags)} found") 8185 return None 8186 8187 def _find_parser( 8188 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8189 ) -> t.Optional[t.Callable]: 8190 if not self._curr: 8191 return None 8192 8193 index = self._index 8194 this = [] 8195 while True: 8196 # The current token might be multiple words 8197 curr = self._curr.text.upper() 8198 key = curr.split(" ") 8199 this.append(curr) 8200 8201 self._advance() 8202 result, trie = in_trie(trie, key) 8203 if result == TrieResult.FAILED: 8204 break 8205 8206 if result == TrieResult.EXISTS: 8207 subparser = parsers[" ".join(this)] 8208 return subparser 8209 8210 self._retreat(index) 8211 return None 8212 8213 def _match(self, token_type, advance=True, expression=None): 8214 if not self._curr: 8215 return None 8216 8217 if self._curr.token_type == token_type: 8218 if advance: 8219 self._advance() 8220 self._add_comments(expression) 8221 return True 8222 8223 return None 8224 8225 def _match_set(self, types, advance=True): 8226 if not self._curr: 8227 return None 8228 8229 if self._curr.token_type in types: 8230 if advance: 8231 self._advance() 8232 return True 8233 8234 return None 8235 8236 def _match_pair(self, token_type_a, token_type_b, advance=True): 8237 if not self._curr or not self._next: 8238 return None 8239 8240 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8241 if advance: 8242 self._advance(2) 8243 return True 8244 8245 return None 8246 8247 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8248 if not self._match(TokenType.L_PAREN, expression=expression): 8249 self.raise_error("Expecting (") 8250 8251 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8252 if not self._match(TokenType.R_PAREN, expression=expression): 8253 self.raise_error("Expecting )") 8254 8255 def _match_texts(self, texts, advance=True): 8256 if ( 8257 self._curr 8258 and self._curr.token_type != TokenType.STRING 8259 and self._curr.text.upper() in texts 8260 ): 8261 if advance: 8262 self._advance() 8263 return True 8264 return None 8265 8266 def _match_text_seq(self, *texts, advance=True): 8267 index = self._index 8268 for text in texts: 8269 if ( 8270 self._curr 8271 and self._curr.token_type != TokenType.STRING 8272 and self._curr.text.upper() == text 8273 ): 8274 self._advance() 8275 else: 8276 self._retreat(index) 8277 return None 8278 8279 if not advance: 8280 self._retreat(index) 8281 8282 return True 8283 8284 def _replace_lambda( 8285 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8286 ) -> t.Optional[exp.Expression]: 8287 if not node: 8288 return node 8289 8290 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8291 8292 for column in node.find_all(exp.Column): 8293 typ = lambda_types.get(column.parts[0].name) 8294 if typ is not None: 8295 dot_or_id = column.to_dot() if column.table else column.this 8296 8297 if typ: 8298 dot_or_id = self.expression( 8299 exp.Cast, 8300 this=dot_or_id, 8301 to=typ, 8302 ) 8303 8304 parent = column.parent 8305 8306 while isinstance(parent, exp.Dot): 8307 if not isinstance(parent.parent, exp.Dot): 8308 parent.replace(dot_or_id) 8309 break 8310 parent = parent.parent 8311 else: 8312 if column is node: 8313 node = dot_or_id 8314 else: 8315 column.replace(dot_or_id) 8316 return node 8317 8318 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8319 start = self._prev 8320 8321 # Not to be confused with TRUNCATE(number, decimals) function call 8322 if self._match(TokenType.L_PAREN): 8323 self._retreat(self._index - 2) 8324 return self._parse_function() 8325 8326 # Clickhouse supports TRUNCATE DATABASE as well 8327 is_database = self._match(TokenType.DATABASE) 8328 8329 self._match(TokenType.TABLE) 8330 8331 exists = self._parse_exists(not_=False) 8332 8333 expressions = self._parse_csv( 8334 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8335 ) 8336 8337 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8338 8339 if self._match_text_seq("RESTART", "IDENTITY"): 8340 identity = "RESTART" 8341 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8342 identity = "CONTINUE" 8343 else: 8344 identity = None 8345 8346 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8347 option = self._prev.text 8348 else: 8349 option = None 8350 8351 partition = self._parse_partition() 8352 8353 # Fallback case 8354 if self._curr: 8355 return self._parse_as_command(start) 8356 8357 return self.expression( 8358 exp.TruncateTable, 8359 expressions=expressions, 8360 is_database=is_database, 8361 exists=exists, 8362 cluster=cluster, 8363 identity=identity, 8364 option=option, 8365 partition=partition, 8366 ) 8367 8368 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8369 this = self._parse_ordered(self._parse_opclass) 8370 8371 if not self._match(TokenType.WITH): 8372 return this 8373 8374 op = self._parse_var(any_token=True) 8375 8376 return self.expression(exp.WithOperator, this=this, op=op) 8377 8378 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8379 self._match(TokenType.EQ) 8380 self._match(TokenType.L_PAREN) 8381 8382 opts: t.List[t.Optional[exp.Expression]] = [] 8383 option: exp.Expression | None 8384 while self._curr and not self._match(TokenType.R_PAREN): 8385 if self._match_text_seq("FORMAT_NAME", "="): 8386 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8387 option = self._parse_format_name() 8388 else: 8389 option = self._parse_property() 8390 8391 if option is None: 8392 self.raise_error("Unable to parse option") 8393 break 8394 8395 opts.append(option) 8396 8397 return opts 8398 8399 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8400 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8401 8402 options = [] 8403 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8404 option = self._parse_var(any_token=True) 8405 prev = self._prev.text.upper() 8406 8407 # Different dialects might separate options and values by white space, "=" and "AS" 8408 self._match(TokenType.EQ) 8409 self._match(TokenType.ALIAS) 8410 8411 param = self.expression(exp.CopyParameter, this=option) 8412 8413 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8414 TokenType.L_PAREN, advance=False 8415 ): 8416 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8417 param.set("expressions", self._parse_wrapped_options()) 8418 elif prev == "FILE_FORMAT": 8419 # T-SQL's external file format case 8420 param.set("expression", self._parse_field()) 8421 else: 8422 param.set("expression", self._parse_unquoted_field()) 8423 8424 options.append(param) 8425 self._match(sep) 8426 8427 return options 8428 8429 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8430 expr = self.expression(exp.Credentials) 8431 8432 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8433 expr.set("storage", self._parse_field()) 8434 if self._match_text_seq("CREDENTIALS"): 8435 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8436 creds = ( 8437 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8438 ) 8439 expr.set("credentials", creds) 8440 if self._match_text_seq("ENCRYPTION"): 8441 expr.set("encryption", self._parse_wrapped_options()) 8442 if self._match_text_seq("IAM_ROLE"): 8443 expr.set("iam_role", self._parse_field()) 8444 if self._match_text_seq("REGION"): 8445 expr.set("region", self._parse_field()) 8446 8447 return expr 8448 8449 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8450 return self._parse_field() 8451 8452 def _parse_copy(self) -> exp.Copy | exp.Command: 8453 start = self._prev 8454 8455 self._match(TokenType.INTO) 8456 8457 this = ( 8458 self._parse_select(nested=True, parse_subquery_alias=False) 8459 if self._match(TokenType.L_PAREN, advance=False) 8460 else self._parse_table(schema=True) 8461 ) 8462 8463 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8464 8465 files = self._parse_csv(self._parse_file_location) 8466 if self._match(TokenType.EQ, advance=False): 8467 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 8468 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 8469 # list via `_parse_wrapped(..)` below. 8470 self._advance(-1) 8471 files = [] 8472 8473 credentials = self._parse_credentials() 8474 8475 self._match_text_seq("WITH") 8476 8477 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8478 8479 # Fallback case 8480 if self._curr: 8481 return self._parse_as_command(start) 8482 8483 return self.expression( 8484 exp.Copy, 8485 this=this, 8486 kind=kind, 8487 credentials=credentials, 8488 files=files, 8489 params=params, 8490 ) 8491 8492 def _parse_normalize(self) -> exp.Normalize: 8493 return self.expression( 8494 exp.Normalize, 8495 this=self._parse_bitwise(), 8496 form=self._match(TokenType.COMMA) and self._parse_var(), 8497 ) 8498 8499 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8500 args = self._parse_csv(lambda: self._parse_lambda()) 8501 8502 this = seq_get(args, 0) 8503 decimals = seq_get(args, 1) 8504 8505 return expr_type( 8506 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8507 ) 8508 8509 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8510 star_token = self._prev 8511 8512 if self._match_text_seq("COLUMNS", "(", advance=False): 8513 this = self._parse_function() 8514 if isinstance(this, exp.Columns): 8515 this.set("unpack", True) 8516 return this 8517 8518 return self.expression( 8519 exp.Star, 8520 **{ # type: ignore 8521 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8522 "replace": self._parse_star_op("REPLACE"), 8523 "rename": self._parse_star_op("RENAME"), 8524 }, 8525 ).update_positions(star_token) 8526 8527 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8528 privilege_parts = [] 8529 8530 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8531 # (end of privilege list) or L_PAREN (start of column list) are met 8532 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8533 privilege_parts.append(self._curr.text.upper()) 8534 self._advance() 8535 8536 this = exp.var(" ".join(privilege_parts)) 8537 expressions = ( 8538 self._parse_wrapped_csv(self._parse_column) 8539 if self._match(TokenType.L_PAREN, advance=False) 8540 else None 8541 ) 8542 8543 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8544 8545 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8546 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8547 principal = self._parse_id_var() 8548 8549 if not principal: 8550 return None 8551 8552 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8553 8554 def _parse_grant_revoke_common( 8555 self, 8556 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8557 privileges = self._parse_csv(self._parse_grant_privilege) 8558 8559 self._match(TokenType.ON) 8560 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8561 8562 # Attempt to parse the securable e.g. MySQL allows names 8563 # such as "foo.*", "*.*" which are not easily parseable yet 8564 securable = self._try_parse(self._parse_table_parts) 8565 8566 return privileges, kind, securable 8567 8568 def _parse_grant(self) -> exp.Grant | exp.Command: 8569 start = self._prev 8570 8571 privileges, kind, securable = self._parse_grant_revoke_common() 8572 8573 if not securable or not self._match_text_seq("TO"): 8574 return self._parse_as_command(start) 8575 8576 principals = self._parse_csv(self._parse_grant_principal) 8577 8578 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8579 8580 if self._curr: 8581 return self._parse_as_command(start) 8582 8583 return self.expression( 8584 exp.Grant, 8585 privileges=privileges, 8586 kind=kind, 8587 securable=securable, 8588 principals=principals, 8589 grant_option=grant_option, 8590 ) 8591 8592 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8593 start = self._prev 8594 8595 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8596 8597 privileges, kind, securable = self._parse_grant_revoke_common() 8598 8599 if not securable or not self._match_text_seq("FROM"): 8600 return self._parse_as_command(start) 8601 8602 principals = self._parse_csv(self._parse_grant_principal) 8603 8604 cascade = None 8605 if self._match_texts(("CASCADE", "RESTRICT")): 8606 cascade = self._prev.text.upper() 8607 8608 if self._curr: 8609 return self._parse_as_command(start) 8610 8611 return self.expression( 8612 exp.Revoke, 8613 privileges=privileges, 8614 kind=kind, 8615 securable=securable, 8616 principals=principals, 8617 grant_option=grant_option, 8618 cascade=cascade, 8619 ) 8620 8621 def _parse_overlay(self) -> exp.Overlay: 8622 return self.expression( 8623 exp.Overlay, 8624 **{ # type: ignore 8625 "this": self._parse_bitwise(), 8626 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8627 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8628 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8629 }, 8630 ) 8631 8632 def _parse_format_name(self) -> exp.Property: 8633 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8634 # for FILE_FORMAT = <format_name> 8635 return self.expression( 8636 exp.Property, 8637 this=exp.var("FORMAT_NAME"), 8638 value=self._parse_string() or self._parse_table_parts(), 8639 ) 8640 8641 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8642 args: t.List[exp.Expression] = [] 8643 8644 if self._match(TokenType.DISTINCT): 8645 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8646 self._match(TokenType.COMMA) 8647 8648 args.extend(self._parse_csv(self._parse_assignment)) 8649 8650 return self.expression( 8651 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8652 ) 8653 8654 def _identifier_expression( 8655 self, token: t.Optional[Token] = None, **kwargs: t.Any 8656 ) -> exp.Identifier: 8657 token = token or self._prev 8658 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8659 expression.update_positions(token) 8660 return expression 8661 8662 def _build_pipe_cte( 8663 self, 8664 query: exp.Query, 8665 expressions: t.List[exp.Expression], 8666 alias_cte: t.Optional[exp.TableAlias] = None, 8667 ) -> exp.Select: 8668 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8669 if alias_cte: 8670 new_cte = alias_cte 8671 else: 8672 self._pipe_cte_counter += 1 8673 new_cte = f"__tmp{self._pipe_cte_counter}" 8674 8675 with_ = query.args.get("with") 8676 ctes = with_.pop() if with_ else None 8677 8678 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8679 if ctes: 8680 new_select.set("with", ctes) 8681 8682 return new_select.with_(new_cte, as_=query, copy=False) 8683 8684 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8685 select = self._parse_select(consume_pipe=False) 8686 if not select: 8687 return query 8688 8689 return self._build_pipe_cte( 8690 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8691 ) 8692 8693 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8694 limit = self._parse_limit() 8695 offset = self._parse_offset() 8696 if limit: 8697 curr_limit = query.args.get("limit", limit) 8698 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8699 query.limit(limit, copy=False) 8700 if offset: 8701 curr_offset = query.args.get("offset") 8702 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8703 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8704 8705 return query 8706 8707 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8708 this = self._parse_assignment() 8709 if self._match_text_seq("GROUP", "AND", advance=False): 8710 return this 8711 8712 this = self._parse_alias(this) 8713 8714 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8715 return self._parse_ordered(lambda: this) 8716 8717 return this 8718 8719 def _parse_pipe_syntax_aggregate_group_order_by( 8720 self, query: exp.Select, group_by_exists: bool = True 8721 ) -> exp.Select: 8722 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8723 aggregates_or_groups, orders = [], [] 8724 for element in expr: 8725 if isinstance(element, exp.Ordered): 8726 this = element.this 8727 if isinstance(this, exp.Alias): 8728 element.set("this", this.args["alias"]) 8729 orders.append(element) 8730 else: 8731 this = element 8732 aggregates_or_groups.append(this) 8733 8734 if group_by_exists: 8735 query.select(*aggregates_or_groups, copy=False).group_by( 8736 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8737 copy=False, 8738 ) 8739 else: 8740 query.select(*aggregates_or_groups, append=False, copy=False) 8741 8742 if orders: 8743 return query.order_by(*orders, append=False, copy=False) 8744 8745 return query 8746 8747 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8748 self._match_text_seq("AGGREGATE") 8749 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8750 8751 if self._match(TokenType.GROUP_BY) or ( 8752 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8753 ): 8754 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8755 8756 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8757 8758 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8759 first_setop = self.parse_set_operation(this=query) 8760 if not first_setop: 8761 return None 8762 8763 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8764 expr = self._parse_paren() 8765 return expr.assert_is(exp.Subquery).unnest() if expr else None 8766 8767 first_setop.this.pop() 8768 8769 setops = [ 8770 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8771 *self._parse_csv(_parse_and_unwrap_query), 8772 ] 8773 8774 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8775 with_ = query.args.get("with") 8776 ctes = with_.pop() if with_ else None 8777 8778 if isinstance(first_setop, exp.Union): 8779 query = query.union(*setops, copy=False, **first_setop.args) 8780 elif isinstance(first_setop, exp.Except): 8781 query = query.except_(*setops, copy=False, **first_setop.args) 8782 else: 8783 query = query.intersect(*setops, copy=False, **first_setop.args) 8784 8785 query.set("with", ctes) 8786 8787 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8788 8789 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8790 join = self._parse_join() 8791 if not join: 8792 return None 8793 8794 if isinstance(query, exp.Select): 8795 return query.join(join, copy=False) 8796 8797 return query 8798 8799 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8800 pivots = self._parse_pivots() 8801 if not pivots: 8802 return query 8803 8804 from_ = query.args.get("from") 8805 if from_: 8806 from_.this.set("pivots", pivots) 8807 8808 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8809 8810 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8811 self._match_text_seq("EXTEND") 8812 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8813 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8814 8815 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8816 sample = self._parse_table_sample() 8817 8818 with_ = query.args.get("with") 8819 if with_: 8820 with_.expressions[-1].this.set("sample", sample) 8821 else: 8822 query.set("sample", sample) 8823 8824 return query 8825 8826 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8827 if isinstance(query, exp.Subquery): 8828 query = exp.select("*").from_(query, copy=False) 8829 8830 if not query.args.get("from"): 8831 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8832 8833 while self._match(TokenType.PIPE_GT): 8834 start = self._curr 8835 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8836 if not parser: 8837 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8838 # keywords, making it tricky to disambiguate them without lookahead. The approach 8839 # here is to try and parse a set operation and if that fails, then try to parse a 8840 # join operator. If that fails as well, then the operator is not supported. 8841 parsed_query = self._parse_pipe_syntax_set_operator(query) 8842 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8843 if not parsed_query: 8844 self._retreat(start) 8845 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8846 break 8847 query = parsed_query 8848 else: 8849 query = parser(self, query) 8850 8851 return query 8852 8853 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8854 vars = self._parse_csv(self._parse_id_var) 8855 if not vars: 8856 return None 8857 8858 return self.expression( 8859 exp.DeclareItem, 8860 this=vars, 8861 kind=self._parse_types(), 8862 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8863 ) 8864 8865 def _parse_declare(self) -> exp.Declare | exp.Command: 8866 start = self._prev 8867 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8868 8869 if not expressions or self._curr: 8870 return self._parse_as_command(start) 8871 8872 return self.expression(exp.Declare, expressions=expressions) 8873 8874 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8875 exp_class = exp.Cast if strict else exp.TryCast 8876 8877 if exp_class == exp.TryCast: 8878 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8879 8880 return self.expression(exp_class, **kwargs) 8881 8882 def _parse_json_value(self) -> exp.JSONValue: 8883 this = self._parse_bitwise() 8884 self._match(TokenType.COMMA) 8885 path = self._parse_bitwise() 8886 8887 returning = self._match(TokenType.RETURNING) and self._parse_type() 8888 8889 return self.expression( 8890 exp.JSONValue, 8891 this=this, 8892 path=self.dialect.to_json_path(path), 8893 returning=returning, 8894 on_condition=self._parse_on_condition(), 8895 ) 8896 8897 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8898 def concat_exprs( 8899 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8900 ) -> exp.Expression: 8901 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8902 concat_exprs = [ 8903 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8904 ] 8905 node.set("expressions", concat_exprs) 8906 return node 8907 if len(exprs) == 1: 8908 return exprs[0] 8909 return self.expression(exp.Concat, expressions=args, safe=True) 8910 8911 args = self._parse_csv(self._parse_lambda) 8912 8913 if args: 8914 order = args[-1] if isinstance(args[-1], exp.Order) else None 8915 8916 if order: 8917 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8918 # remove 'expr' from exp.Order and add it back to args 8919 args[-1] = order.this 8920 order.set("this", concat_exprs(order.this, args)) 8921 8922 this = order or concat_exprs(args[0], args) 8923 else: 8924 this = None 8925 8926 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8927 8928 return self.expression(exp.GroupConcat, this=this, separator=separator)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1585 def __init__( 1586 self, 1587 error_level: t.Optional[ErrorLevel] = None, 1588 error_message_context: int = 100, 1589 max_errors: int = 3, 1590 dialect: DialectType = None, 1591 ): 1592 from sqlglot.dialects import Dialect 1593 1594 self.error_level = error_level or ErrorLevel.IMMEDIATE 1595 self.error_message_context = error_message_context 1596 self.max_errors = max_errors 1597 self.dialect = Dialect.get_or_raise(dialect) 1598 self.reset()
1611 def parse( 1612 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1613 ) -> t.List[t.Optional[exp.Expression]]: 1614 """ 1615 Parses a list of tokens and returns a list of syntax trees, one tree 1616 per parsed SQL statement. 1617 1618 Args: 1619 raw_tokens: The list of tokens. 1620 sql: The original SQL string, used to produce helpful debug messages. 1621 1622 Returns: 1623 The list of the produced syntax trees. 1624 """ 1625 return self._parse( 1626 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1627 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1629 def parse_into( 1630 self, 1631 expression_types: exp.IntoType, 1632 raw_tokens: t.List[Token], 1633 sql: t.Optional[str] = None, 1634 ) -> t.List[t.Optional[exp.Expression]]: 1635 """ 1636 Parses a list of tokens into a given Expression type. If a collection of Expression 1637 types is given instead, this method will try to parse the token list into each one 1638 of them, stopping at the first for which the parsing succeeds. 1639 1640 Args: 1641 expression_types: The expression type(s) to try and parse the token list into. 1642 raw_tokens: The list of tokens. 1643 sql: The original SQL string, used to produce helpful debug messages. 1644 1645 Returns: 1646 The target Expression. 1647 """ 1648 errors = [] 1649 for expression_type in ensure_list(expression_types): 1650 parser = self.EXPRESSION_PARSERS.get(expression_type) 1651 if not parser: 1652 raise TypeError(f"No parser registered for {expression_type}") 1653 1654 try: 1655 return self._parse(parser, raw_tokens, sql) 1656 except ParseError as e: 1657 e.errors[0]["into_expression"] = expression_type 1658 errors.append(e) 1659 1660 raise ParseError( 1661 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1662 errors=merge_errors(errors), 1663 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1703 def check_errors(self) -> None: 1704 """Logs or raises any found errors, depending on the chosen error level setting.""" 1705 if self.error_level == ErrorLevel.WARN: 1706 for error in self.errors: 1707 logger.error(str(error)) 1708 elif self.error_level == ErrorLevel.RAISE and self.errors: 1709 raise ParseError( 1710 concat_messages(self.errors, self.max_errors), 1711 errors=merge_errors(self.errors), 1712 )
Logs or raises any found errors, depending on the chosen error level setting.
1714 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1715 """ 1716 Appends an error in the list of recorded errors or raises it, depending on the chosen 1717 error level setting. 1718 """ 1719 token = token or self._curr or self._prev or Token.string("") 1720 start = token.start 1721 end = token.end + 1 1722 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1723 highlight = self.sql[start:end] 1724 end_context = self.sql[end : end + self.error_message_context] 1725 1726 error = ParseError.new( 1727 f"{message}. Line {token.line}, Col: {token.col}.\n" 1728 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1729 description=message, 1730 line=token.line, 1731 col=token.col, 1732 start_context=start_context, 1733 highlight=highlight, 1734 end_context=end_context, 1735 ) 1736 1737 if self.error_level == ErrorLevel.IMMEDIATE: 1738 raise error 1739 1740 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1742 def expression( 1743 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1744 ) -> E: 1745 """ 1746 Creates a new, validated Expression. 1747 1748 Args: 1749 exp_class: The expression class to instantiate. 1750 comments: An optional list of comments to attach to the expression. 1751 kwargs: The arguments to set for the expression along with their respective values. 1752 1753 Returns: 1754 The target expression. 1755 """ 1756 instance = exp_class(**kwargs) 1757 instance.add_comments(comments) if comments else self._add_comments(instance) 1758 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1765 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1766 """ 1767 Validates an Expression, making sure that all its mandatory arguments are set. 1768 1769 Args: 1770 expression: The expression to validate. 1771 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1772 1773 Returns: 1774 The validated expression. 1775 """ 1776 if self.error_level != ErrorLevel.IGNORE: 1777 for error_message in expression.error_messages(args): 1778 self.raise_error(error_message) 1779 1780 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4875 def parse_set_operation( 4876 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4877 ) -> t.Optional[exp.Expression]: 4878 start = self._index 4879 _, side_token, kind_token = self._parse_join_parts() 4880 4881 side = side_token.text if side_token else None 4882 kind = kind_token.text if kind_token else None 4883 4884 if not self._match_set(self.SET_OPERATIONS): 4885 self._retreat(start) 4886 return None 4887 4888 token_type = self._prev.token_type 4889 4890 if token_type == TokenType.UNION: 4891 operation: t.Type[exp.SetOperation] = exp.Union 4892 elif token_type == TokenType.EXCEPT: 4893 operation = exp.Except 4894 else: 4895 operation = exp.Intersect 4896 4897 comments = self._prev.comments 4898 4899 if self._match(TokenType.DISTINCT): 4900 distinct: t.Optional[bool] = True 4901 elif self._match(TokenType.ALL): 4902 distinct = False 4903 else: 4904 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4905 if distinct is None: 4906 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4907 4908 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4909 "STRICT", "CORRESPONDING" 4910 ) 4911 if self._match_text_seq("CORRESPONDING"): 4912 by_name = True 4913 if not side and not kind: 4914 kind = "INNER" 4915 4916 on_column_list = None 4917 if by_name and self._match_texts(("ON", "BY")): 4918 on_column_list = self._parse_wrapped_csv(self._parse_column) 4919 4920 expression = self._parse_select( 4921 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4922 ) 4923 4924 return self.expression( 4925 operation, 4926 comments=comments, 4927 this=this, 4928 distinct=distinct, 4929 by_name=by_name, 4930 expression=expression, 4931 side=side, 4932 kind=kind, 4933 on=on_column_list, 4934 )